sunholo 0.109.1__py3-none-any.whl → 0.109.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,18 +1,18 @@
1
1
  from ..custom_logging import log
2
2
  from ..gcs import get_bytes_from_gcs
3
3
 
4
+ from functools import partial
4
5
  import mimetypes
5
6
  import asyncio
6
7
  import tempfile
7
8
  import re
9
+ import os
8
10
  import traceback
9
11
  try:
10
12
  import google.generativeai as genai
11
- from google.generativeai.types import file_types
12
13
  except ImportError:
13
14
  genai = None
14
- file_types = None
15
-
15
+
16
16
  DOCUMENT_MIMES = [
17
17
  'application/pdf',
18
18
  'application/x-javascript',
@@ -72,12 +72,25 @@ ALLOWED_MIME_TYPES = set(AUDIO_MIMES + VIDEO_MIMES + IMAGE_MIMES + DOCUMENT_MIME
72
72
  # 'url': 'https://firebasestorage.googleapis.com/v0/b/multiv...',
73
73
  # 'contentType': 'image/jpeg'}
74
74
  # ]
75
+
76
+ def sanitize_file(filename):
77
+ # Split the filename into name and extension
78
+ name, extension = os.path.splitext(filename)
79
+
80
+ # Sanitize the name by removing invalid characters and converting to lowercase
81
+ sanitized_name = re.sub(r'[^a-z0-9-]', '', name.lower())
82
+ sanitized_name = re.sub(r'^-+|-+$', '', sanitized_name) # Remove leading or trailing dashes
83
+
84
+ # Reattach the original extension
85
+ return f"{sanitized_name}"
86
+
75
87
  async def construct_file_content(gs_list, bucket:str):
76
88
  """
77
89
  Args:
78
90
  - gs_list: a list of dicts representing files in a bucket
79
91
  - contentType: The content type of the file on GCS
80
92
  - storagePath: The path in the bucket
93
+ - name: The name of the file
81
94
  - bucket: The bucket the files are in
82
95
 
83
96
  """
@@ -102,32 +115,50 @@ async def construct_file_content(gs_list, bucket:str):
102
115
  tasks = []
103
116
  for file_info in file_list:
104
117
  img_url = f"gs://{bucket}/{file_info['storagePath']}"
118
+ display_url = file_info.get('url')
105
119
  mime_type = file_info['contentType']
106
- # Append the async download task to the task list
107
- tasks.append(download_gcs_upload_genai(img_url, mime_type))
120
+ name = sanitize_file(file_info['name'])
121
+ log.info(f"Processing {name=}")
122
+ try:
123
+ myfile = genai.get_file(name)
124
+ content.append(
125
+ {"role": "user", "parts": [
126
+ {"file_data": myfile},
127
+ {"text": f"You have been given the ability to work with file {name=} with {mime_type=} {display_url=}"}
128
+ ]
129
+ })
130
+ log.info(f"Found existing genai.get_file {name=}")
131
+
132
+ except Exception as e:
133
+ log.info(f"Not found checking genai.get_file: '{name}' {str(e)}")
134
+ tasks.append(download_gcs_upload_genai(img_url, mime_type, name=name))
108
135
 
109
136
  # Run all tasks in parallel
110
- content = await asyncio.gather(*tasks)
137
+ if tasks:
138
+ task_content = await asyncio.gather(*tasks)
139
+ content.extend(task_content)
111
140
 
112
141
  return content
113
142
 
114
143
  # Helper function to handle each file download with error handling
115
- async def download_file_with_error_handling(img_url, mime_type):
144
+ async def download_file_with_error_handling(img_url, mime_type, name):
116
145
  try:
117
- return await download_gcs_upload_genai(img_url, mime_type)
146
+ return await download_gcs_upload_genai(img_url, mime_type, name)
118
147
  except Exception as err:
119
148
  msg= f"Error processing file from {img_url}: {str(err)}"
120
149
  log.error(msg)
121
150
  return {"role": "user", "parts": [{"text": msg}]}
122
151
 
123
- async def download_gcs_upload_genai(img_url, mime_type, retries=3, delay=2):
152
+ async def download_gcs_upload_genai(img_url, mime_type, name=None, retries=3, delay=2):
124
153
  import aiofiles
154
+ from google.generativeai.types import file_types
125
155
  """
126
156
  Downloads and uploads a file with retries in case of failure.
127
157
 
128
158
  Args:
129
159
  - img_url: str The URL of the file to download.
130
160
  - mime_type: str The MIME type of the file.
161
+ - name: str Optional name, else a random one will be created
131
162
  - retries: int Number of retry attempts before failing.
132
163
  - delay: int Initial delay between retries, exponentially increasing.
133
164
 
@@ -159,7 +190,7 @@ async def download_gcs_upload_genai(img_url, mime_type, retries=3, delay=2):
159
190
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=extension)
160
191
  downloaded_file = temp_file.name
161
192
 
162
- sanitized_file = re.sub(r'[^\w\-.]', '_', downloaded_file)
193
+ sanitized_file = sanitize_file(downloaded_file)
163
194
 
164
195
  log.info(f"Writing file {sanitized_file}")
165
196
  async with aiofiles.open(sanitized_file, 'wb') as f:
@@ -167,8 +198,13 @@ async def download_gcs_upload_genai(img_url, mime_type, retries=3, delay=2):
167
198
 
168
199
  # Upload the file and get its content reference
169
200
  try:
170
- downloaded_content: file_types.File = await asyncio.to_thread(genai.upload_file, sanitized_file )
171
- return {"role": "user", "parts": [{"file_data": downloaded_content}]}
201
+ downloaded_content: file_types.File = await asyncio.to_thread(
202
+ partial(genai.upload_file, name=name, mime_type=mime_type),
203
+ sanitized_file
204
+ )
205
+ return {"role": "user", "parts": [{"file_data": downloaded_content},
206
+ {"text": f"You have been given the ability to read and work with filename '{name}' with {mime_type=}."}
207
+ ]}
172
208
  except Exception as err:
173
209
  msg = f"Could not upload {sanitized_file} to genai.upload_file: {str(err)} {traceback.format_exc()}"
174
210
  log.error(msg)
@@ -6,6 +6,8 @@ try:
6
6
  import sounddevice as sd
7
7
  except ImportError:
8
8
  sd = None
9
+ except OSError:
10
+ sd = None
9
11
 
10
12
  try:
11
13
  import numpy as np
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.109.1
3
+ Version: 0.109.4
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.109.1.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.109.4.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -21,8 +21,9 @@ License-File: LICENSE.txt
21
21
  Requires-Dist: google-auth
22
22
  Requires-Dist: ruamel.yaml
23
23
  Requires-Dist: langchain ==0.2.16
24
- Requires-Dist: langchain-experimental >=0.0.61
25
- Requires-Dist: langchain-community >=0.2.11
24
+ Requires-Dist: langchain-experimental ==0.0.65
25
+ Requires-Dist: langchain-community ==0.2.17
26
+ Requires-Dist: langsmith ==0.1.140
26
27
  Provides-Extra: all
27
28
  Requires-Dist: anthropic[vertex] ; extra == 'all'
28
29
  Requires-Dist: asyncpg ; extra == 'all'
@@ -50,7 +51,7 @@ Requires-Dist: httpcore ; extra == 'all'
50
51
  Requires-Dist: httpx ; extra == 'all'
51
52
  Requires-Dist: jsonschema ; extra == 'all'
52
53
  Requires-Dist: lancedb ; extra == 'all'
53
- Requires-Dist: langchain >=0.2.12 ; extra == 'all'
54
+ Requires-Dist: langchain >=0.2.16 ; extra == 'all'
54
55
  Requires-Dist: langchain-experimental >=0.0.61 ; extra == 'all'
55
56
  Requires-Dist: langchain-community >=0.2.11 ; extra == 'all'
56
57
  Requires-Dist: langchain-openai ==0.1.25 ; extra == 'all'
@@ -86,7 +86,7 @@ sunholo/gcs/download_url.py,sha256=Ul81n1rklr8WogPsuxWWD1Nr8RHU451LzHPMJNhAKzw,6
86
86
  sunholo/gcs/extract_and_sign.py,sha256=paRrTCvCN5vkQwCB7OSkxWi-pfOgOtZ0bwdXE08c3Ps,1546
87
87
  sunholo/gcs/metadata.py,sha256=oQLcXi4brsZ74aegWyC1JZmhlaEV270HS5_UWtAYYWE,898
88
88
  sunholo/genai/__init__.py,sha256=6SWK7uV5F625J-P3xQoD6WKL59a9RSaidj-Guslyt8Q,192
89
- sunholo/genai/file_handling.py,sha256=b3vT_MIsinJSMqEa1MoelmABPTZH5iXNdCLvA6z7Qg8,5995
89
+ sunholo/genai/file_handling.py,sha256=cGQeDvB93-3XgHRA020E7344yG7_EgX0KAsJLitQJBg,7552
90
90
  sunholo/genai/images.py,sha256=EyjsDqt6XQw99pZUQamomCpMOoIah9bp3XY94WPU7Ms,1678
91
91
  sunholo/genai/init.py,sha256=yG8E67TduFCTQPELo83OJuWfjwTnGZsyACospahyEaY,687
92
92
  sunholo/genai/process_funcs_cls.py,sha256=7_RQMqIAZ3nPP-GFgCHBvS39fwuWuGtvSyuJaJN_G3E,31590
@@ -117,7 +117,7 @@ sunholo/qna/__init__.py,sha256=F8q1uR_HreoSX0IfmKY1qoSwIgXhO2Q8kuDSxh9_-EE,28
117
117
  sunholo/qna/parsers.py,sha256=YpOaK5S_LxJ6FbliSYDc3AVOJ62RVduayoNnzi_p8CM,2494
118
118
  sunholo/qna/retry.py,sha256=yMw7RTkw-RXCzfENPJOt8c32mXlpvOR589EGkvK-6yI,2028
119
119
  sunholo/senses/__init__.py,sha256=fbWqVwwzkV5uRSb8lQzo4pn0ja_VYVWbUYapurSowBs,39
120
- sunholo/senses/stream_voice.py,sha256=tQL5TDH43OuXRlxWRLiIAceYApXyqNczN1D3jUiNMUo,18092
120
+ sunholo/senses/stream_voice.py,sha256=VpCmooEKghBT1jPJe9mX7gKggGqY6qt-bpO7hwY4sPE,18122
121
121
  sunholo/streaming/__init__.py,sha256=MpbydI2UYo_adttPQFkxNM33b-QRyNEbrKJx0C2AGPc,241
122
122
  sunholo/streaming/content_buffer.py,sha256=0LHMwH4ctq5kjhIgMFNH0bA1RL0jMISlLVzzLcFrvv4,12766
123
123
  sunholo/streaming/langserve.py,sha256=hi7q8WY8DPKrALl9m_dOMxWOdE-iEuk7YW05SVDFIX8,6514
@@ -150,9 +150,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
150
150
  sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
151
151
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
152
152
  sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
153
- sunholo-0.109.1.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
154
- sunholo-0.109.1.dist-info/METADATA,sha256=U-SgUIRCDApHuOqKQPELVpyiTMqQo9TQ2E2HTNA0WuQ,8670
155
- sunholo-0.109.1.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
156
- sunholo-0.109.1.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
157
- sunholo-0.109.1.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
158
- sunholo-0.109.1.dist-info/RECORD,,
153
+ sunholo-0.109.4.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
154
+ sunholo-0.109.4.dist-info/METADATA,sha256=_nE3mKEViVRioU2NglcZCIXJ0PMqdsM7lYi__xkj8ao,8705
155
+ sunholo-0.109.4.dist-info/WHEEL,sha256=a7TGlA-5DaHMRrarXjVbQagU3Man_dCnGIWMJr5kRWo,91
156
+ sunholo-0.109.4.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
157
+ sunholo-0.109.4.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
158
+ sunholo-0.109.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.3.0)
2
+ Generator: setuptools (75.4.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5