sunholo 0.108.0__py3-none-any.whl → 0.109.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sunholo/genai/__init__.py CHANGED
@@ -1,3 +1,4 @@
1
1
  from .process_funcs_cls import GenAIFunctionProcessor
2
2
  from .safety import genai_safety
3
- from .init import init_genai
3
+ from .init import init_genai
4
+ from .file_handling import download_gcs_upload_genai, construct_file_content
@@ -0,0 +1,221 @@
1
+ from ..custom_logging import log
2
+ from ..gcs import get_bytes_from_gcs
3
+
4
+ from functools import partial
5
+ import mimetypes
6
+ import asyncio
7
+ import tempfile
8
+ import re
9
+ import os
10
+ import traceback
11
+ try:
12
+ import google.generativeai as genai
13
+ except ImportError:
14
+ genai = None
15
+
16
+ DOCUMENT_MIMES = [
17
+ 'application/pdf',
18
+ 'application/x-javascript',
19
+ 'text/javascript',
20
+ 'application/x-python',
21
+ 'text/x-python',
22
+ 'text/plain',
23
+ 'text/html',
24
+ 'text/css',
25
+ 'text/md',
26
+ 'text/csv',
27
+ 'text/xml',
28
+ 'text/rtf'
29
+ ]
30
+
31
+ IMAGE_MIMES = [
32
+ 'image/png',
33
+ 'image/jpeg',
34
+ 'image/webp',
35
+ 'image/heic',
36
+ 'image/heif',
37
+ ]
38
+
39
+ VIDEO_MIMES = [
40
+ 'video/mp4',
41
+ 'video/mpeg',
42
+ 'video/mov',
43
+ 'video/avi',
44
+ 'video/x-flv',
45
+ 'video/mpg',
46
+ 'video/webm',
47
+ 'video/wmv',
48
+ 'video/3gpp'
49
+ ]
50
+
51
+ AUDIO_MIMES = [
52
+ 'audio/wav',
53
+ 'audio/mp3',
54
+ 'audio/aiff',
55
+ 'audio/aac',
56
+ 'audio/ogg',
57
+ 'audio/flac',
58
+ ]
59
+
60
+ ALLOWED_MIME_TYPES = set(AUDIO_MIMES + VIDEO_MIMES + IMAGE_MIMES + DOCUMENT_MIMES)
61
+
62
+ # 'documents':
63
+ # [
64
+ # {'storagePath': 'users/UQcKi4u7s...dsd.png',
65
+ # 'url': 'https://firebasestorage.googleapis.com/v0/b/multi...',
66
+ # 'contentType': 'image/png',
67
+ # 'type': 'image',
68
+ # 'name': 'multivac-data-architecture.png'},
69
+ # {'storagePath': 'users/UQc...3dc59e1.jpg',
70
+ # 'type': 'image',
71
+ # 'name': 'holosun-circle.jpg',
72
+ # 'url': 'https://firebasestorage.googleapis.com/v0/b/multiv...',
73
+ # 'contentType': 'image/jpeg'}
74
+ # ]
75
+
76
+ def sanitize_file(filename):
77
+ # Split the filename into name and extension
78
+ name, extension = os.path.splitext(filename)
79
+
80
+ # Sanitize the name by removing invalid characters and converting to lowercase
81
+ sanitized_name = re.sub(r'[^a-z0-9-]', '', name.lower())
82
+ sanitized_name = re.sub(r'^-+|-+$', '', sanitized_name) # Remove leading or trailing dashes
83
+
84
+ # Reattach the original extension
85
+ return f"{sanitized_name}"
86
+
87
+ async def construct_file_content(gs_list, bucket:str):
88
+ """
89
+ Args:
90
+ - gs_list: a list of dicts representing files in a bucket
91
+ - contentType: The content type of the file on GCS
92
+ - storagePath: The path in the bucket
93
+ - name: The name of the file
94
+ - bucket: The bucket the files are in
95
+
96
+ """
97
+
98
+ file_list = []
99
+ for element in gs_list:
100
+
101
+ the_mime_type = element.get('contentType')
102
+ if the_mime_type is None:
103
+ continue
104
+ if element.get('storagePath') is None:
105
+ continue
106
+ if the_mime_type in ALLOWED_MIME_TYPES:
107
+ file_list.append(element)
108
+
109
+ if not file_list:
110
+ return {"role": "user", "parts": [{"text": "No eligible contentTypes were found"}]}
111
+
112
+ content = []
113
+
114
+ # Loop through the valid files and process them
115
+ tasks = []
116
+ for file_info in file_list:
117
+ img_url = f"gs://{bucket}/{file_info['storagePath']}"
118
+ mime_type = file_info['contentType']
119
+ name = sanitize_file(file_info['name'])
120
+ log.info(f"Processing {name=}")
121
+ try:
122
+ myfile = genai.get_file(name)
123
+ content.append(
124
+ {"role": "user", "parts": [
125
+ {"file_data": myfile},
126
+ {"text": f"You have been given the ability to work with file '{name}' with {mime_type=}."}
127
+ ]
128
+ })
129
+ log.info(f"Found existing genai.get_file {name=}")
130
+
131
+ except Exception as e:
132
+ log.info(f"Not found checking genai.get_file: '{name}' {str(e)}")
133
+ tasks.append(download_gcs_upload_genai(img_url, mime_type, name=name))
134
+
135
+ # Run all tasks in parallel
136
+ if tasks:
137
+ task_content = await asyncio.gather(*tasks)
138
+ content.extend(task_content)
139
+
140
+ return content
141
+
142
+ # Helper function to handle each file download with error handling
143
+ async def download_file_with_error_handling(img_url, mime_type, name):
144
+ try:
145
+ return await download_gcs_upload_genai(img_url, mime_type, name)
146
+ except Exception as err:
147
+ msg= f"Error processing file from {img_url}: {str(err)}"
148
+ log.error(msg)
149
+ return {"role": "user", "parts": [{"text": msg}]}
150
+
151
+ async def download_gcs_upload_genai(img_url, mime_type, name=None, retries=3, delay=2):
152
+ import aiofiles
153
+ from google.generativeai.types import file_types
154
+ """
155
+ Downloads and uploads a file with retries in case of failure.
156
+
157
+ Args:
158
+ - img_url: str The URL of the file to download.
159
+ - mime_type: str The MIME type of the file.
160
+ - name: str Optional name, else a random one will be created
161
+ - retries: int Number of retry attempts before failing.
162
+ - delay: int Initial delay between retries, exponentially increasing.
163
+
164
+ Returns:
165
+ - downloaded_content: The result of the file upload if successful.
166
+ """
167
+ for attempt in range(retries):
168
+ try:
169
+ log.info(f"Upload {attempt} for {img_url=}")
170
+ # Download the file bytes asynchronously
171
+ file_bytes = await asyncio.to_thread(get_bytes_from_gcs, img_url)
172
+ if not file_bytes:
173
+ msg = f"Failed to download file from {img_url}: got None"
174
+ log.warning(msg)
175
+ return {"role": "user", "parts": [{"text": msg}]}
176
+
177
+ # Log the size of the file bytes
178
+ file_size = len(file_bytes)
179
+ log.info(f"Downloaded file size for {img_url}: {file_size} bytes")
180
+
181
+ if file_size > 19434343:
182
+ log.warning(f"File size for {img_url}: {file_size} is too big.")
183
+ msg = f"The file for {img_url} is too large ({file_size} bytes) to be used directly. Use RAG instead."
184
+ return {"role": "user", "parts": [{"text": msg}]}
185
+
186
+ extension = mimetypes.guess_extension(mime_type)
187
+
188
+ # Use aiofiles for asynchronous file operations
189
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=extension)
190
+ downloaded_file = temp_file.name
191
+
192
+ sanitized_file = sanitize_file(downloaded_file)
193
+
194
+ log.info(f"Writing file {sanitized_file}")
195
+ async with aiofiles.open(sanitized_file, 'wb') as f:
196
+ await f.write(file_bytes)
197
+
198
+ # Upload the file and get its content reference
199
+ try:
200
+ downloaded_content: file_types.File = await asyncio.to_thread(
201
+ partial(genai.upload_file, name=name, mime_type=mime_type),
202
+ sanitized_file
203
+ )
204
+ return {"role": "user", "parts": [{"file_data": downloaded_content},
205
+ {"text": f"You have been given the ability to read and work with filename '{name}' with {mime_type=}."}
206
+ ]}
207
+ except Exception as err:
208
+ msg = f"Could not upload {sanitized_file} to genai.upload_file: {str(err)} {traceback.format_exc()}"
209
+ log.error(msg)
210
+ return {"role": "user", "parts": [{"text": msg}]}
211
+
212
+ except Exception as err:
213
+ log.error(f"Error processing file {img_url} on attempt {attempt + 1}/{retries}: {str(err)}")
214
+
215
+ if attempt < retries - 1:
216
+ log.info(f"Retrying in {delay} seconds...")
217
+ await asyncio.sleep(delay)
218
+ delay *= 2 # Exponential backoff
219
+ else:
220
+ raise err # Raise the error after max retries
221
+
@@ -6,6 +6,8 @@ try:
6
6
  import sounddevice as sd
7
7
  except ImportError:
8
8
  sd = None
9
+ except OSError:
10
+ sd = None
9
11
 
10
12
  try:
11
13
  import numpy as np
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.108.0
3
+ Version: 0.109.3
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.108.0.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.109.3.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -21,8 +21,9 @@ License-File: LICENSE.txt
21
21
  Requires-Dist: google-auth
22
22
  Requires-Dist: ruamel.yaml
23
23
  Requires-Dist: langchain ==0.2.16
24
- Requires-Dist: langchain-experimental >=0.0.61
25
- Requires-Dist: langchain-community >=0.2.11
24
+ Requires-Dist: langchain-experimental ==0.0.65
25
+ Requires-Dist: langchain-community ==0.2.17
26
+ Requires-Dist: langsmith ==0.1.140
26
27
  Provides-Extra: all
27
28
  Requires-Dist: anthropic[vertex] ; extra == 'all'
28
29
  Requires-Dist: asyncpg ; extra == 'all'
@@ -50,7 +51,7 @@ Requires-Dist: httpcore ; extra == 'all'
50
51
  Requires-Dist: httpx ; extra == 'all'
51
52
  Requires-Dist: jsonschema ; extra == 'all'
52
53
  Requires-Dist: lancedb ; extra == 'all'
53
- Requires-Dist: langchain >=0.2.12 ; extra == 'all'
54
+ Requires-Dist: langchain >=0.2.16 ; extra == 'all'
54
55
  Requires-Dist: langchain-experimental >=0.0.61 ; extra == 'all'
55
56
  Requires-Dist: langchain-community >=0.2.11 ; extra == 'all'
56
57
  Requires-Dist: langchain-openai ==0.1.25 ; extra == 'all'
@@ -85,7 +85,8 @@ sunholo/gcs/download_folder.py,sha256=ijJTnS595JqZhBH8iHFErQilMbkuKgL-bnTCMLGuvl
85
85
  sunholo/gcs/download_url.py,sha256=Ul81n1rklr8WogPsuxWWD1Nr8RHU451LzHPMJNhAKzw,6416
86
86
  sunholo/gcs/extract_and_sign.py,sha256=paRrTCvCN5vkQwCB7OSkxWi-pfOgOtZ0bwdXE08c3Ps,1546
87
87
  sunholo/gcs/metadata.py,sha256=oQLcXi4brsZ74aegWyC1JZmhlaEV270HS5_UWtAYYWE,898
88
- sunholo/genai/__init__.py,sha256=dBl6IA3-Fx6-Vx81r0XqxHlUq6WeW1iDX188dpChu8s,115
88
+ sunholo/genai/__init__.py,sha256=6SWK7uV5F625J-P3xQoD6WKL59a9RSaidj-Guslyt8Q,192
89
+ sunholo/genai/file_handling.py,sha256=DRMZlqZtZNjNmM033gvVqqxxO7J_Xvz_K78mOY5nxTQ,7496
89
90
  sunholo/genai/images.py,sha256=EyjsDqt6XQw99pZUQamomCpMOoIah9bp3XY94WPU7Ms,1678
90
91
  sunholo/genai/init.py,sha256=yG8E67TduFCTQPELo83OJuWfjwTnGZsyACospahyEaY,687
91
92
  sunholo/genai/process_funcs_cls.py,sha256=7_RQMqIAZ3nPP-GFgCHBvS39fwuWuGtvSyuJaJN_G3E,31590
@@ -116,7 +117,7 @@ sunholo/qna/__init__.py,sha256=F8q1uR_HreoSX0IfmKY1qoSwIgXhO2Q8kuDSxh9_-EE,28
116
117
  sunholo/qna/parsers.py,sha256=YpOaK5S_LxJ6FbliSYDc3AVOJ62RVduayoNnzi_p8CM,2494
117
118
  sunholo/qna/retry.py,sha256=yMw7RTkw-RXCzfENPJOt8c32mXlpvOR589EGkvK-6yI,2028
118
119
  sunholo/senses/__init__.py,sha256=fbWqVwwzkV5uRSb8lQzo4pn0ja_VYVWbUYapurSowBs,39
119
- sunholo/senses/stream_voice.py,sha256=tQL5TDH43OuXRlxWRLiIAceYApXyqNczN1D3jUiNMUo,18092
120
+ sunholo/senses/stream_voice.py,sha256=VpCmooEKghBT1jPJe9mX7gKggGqY6qt-bpO7hwY4sPE,18122
120
121
  sunholo/streaming/__init__.py,sha256=MpbydI2UYo_adttPQFkxNM33b-QRyNEbrKJx0C2AGPc,241
121
122
  sunholo/streaming/content_buffer.py,sha256=0LHMwH4ctq5kjhIgMFNH0bA1RL0jMISlLVzzLcFrvv4,12766
122
123
  sunholo/streaming/langserve.py,sha256=hi7q8WY8DPKrALl9m_dOMxWOdE-iEuk7YW05SVDFIX8,6514
@@ -149,9 +150,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
149
150
  sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
150
151
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
151
152
  sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
152
- sunholo-0.108.0.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
153
- sunholo-0.108.0.dist-info/METADATA,sha256=NoRyTCRajNC5x0uAVpyUs8Ogp9oCuChGi2rcMdpG5QQ,8670
154
- sunholo-0.108.0.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
155
- sunholo-0.108.0.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
156
- sunholo-0.108.0.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
157
- sunholo-0.108.0.dist-info/RECORD,,
153
+ sunholo-0.109.3.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
154
+ sunholo-0.109.3.dist-info/METADATA,sha256=kcRA_wA-X5fPF1zpAU-nyDdHmTVgwUHpDQgqeYcmQH0,8705
155
+ sunholo-0.109.3.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
156
+ sunholo-0.109.3.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
157
+ sunholo-0.109.3.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
158
+ sunholo-0.109.3.dist-info/RECORD,,