sunholo 0.119.16__py3-none-any.whl → 0.120.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sunholo/auth/refresh.py CHANGED
@@ -9,8 +9,9 @@ from ..custom_logging import log
9
9
  def get_default_email():
10
10
 
11
11
  # https://stackoverflow.com/questions/64234214/how-to-generate-a-blob-signed-url-in-google-cloud-run
12
-
13
- gcs_credentials, project_id = refresh_credentials()
12
+ gcs_credentials, project_id = get_default_creds()
13
+ if gcs_credentials is None:
14
+ gcs_credentials, project_id = refresh_credentials()
14
15
 
15
16
  if gcs_credentials is None:
16
17
  log.error("Could not refresh the credentials properly.")
@@ -344,6 +344,7 @@ class DiscoveryEngineClient:
344
344
  return search_response
345
345
 
346
346
  def chunk_format(self, chunk):
347
+
347
348
  return (
348
349
  f"# {chunk.id}\n"
349
350
  f"{chunk.content}\n"
@@ -351,7 +352,7 @@ class DiscoveryEngineClient:
351
352
  f"Relevance score: {chunk.relevance_score}\n"
352
353
  f"Document URI: {chunk.document_metadata.uri}\n"
353
354
  f"Document Title: {chunk.document_metadata.title}\n"
354
- f"Document Metadata: {chunk.document_metadata.struct_data.dict()}\n"
355
+ f"Document Metadata: {dict(chunk.document_metadata.struct_data)}\n"
355
356
  )
356
357
 
357
358
  def process_chunks(self, response):
@@ -3,6 +3,7 @@ from ..gcs import get_bytes_from_gcs
3
3
 
4
4
  from functools import partial
5
5
  import mimetypes
6
+ import uuid
6
7
  import asyncio
7
8
  import tempfile
8
9
  import re
@@ -11,6 +12,7 @@ import traceback
11
12
  try:
12
13
  import google.generativeai as genai
13
14
  from google import genai as genaiv2
15
+
14
16
  except ImportError:
15
17
  genai = None
16
18
  genaiv2 = None
@@ -89,20 +91,16 @@ def sanitize_file(filename):
89
91
 
90
92
  async def construct_file_content(gs_list, bucket:str, genai_lib=False):
91
93
  """
94
+ Thread-safe implementation for processing multiple files concurrently.
95
+
92
96
  Args:
93
97
  - gs_list: a list of dicts representing files in a bucket
94
- - contentType: The content type of the file on GCS
95
- - storagePath: The path in the bucket
96
- - name: The name of the file
97
- - url: The URL of the file that can be used to display the contents
98
98
  - bucket: The bucket the files are in
99
- - genai: whether its using the genai SDK
100
-
99
+ - genai_lib: whether its using the genai SDK
101
100
  """
102
101
 
103
102
  file_list = []
104
103
  for element in gs_list:
105
-
106
104
  the_mime_type = element.get('contentType')
107
105
  if the_mime_type is None:
108
106
  continue
@@ -124,34 +122,42 @@ async def construct_file_content(gs_list, bucket:str, genai_lib=False):
124
122
  img_url = f"gs://{bucket}/{file_info['storagePath']}"
125
123
  display_url = file_info.get('url')
126
124
  mime_type = file_info['contentType']
127
- name = sanitize_file(file_info['name'])
125
+ # Generate a unique name for each file to avoid conflicts
126
+ original_name = sanitize_file(file_info['name'])
127
+ unique_name = f"{original_name}_{str(uuid.uuid4())[:8]}"
128
128
  display_name = file_info['name']
129
- log.info(f"Processing {name=} {display_name=}")
129
+ log.info(f"Processing {unique_name=} {display_name=}")
130
+
130
131
  try:
131
132
  if not genai_lib:
132
- myfile = genai.get_file(name)
133
+ myfile = genai.get_file(unique_name)
133
134
  else:
134
135
  client = genaiv2.Client()
135
- myfile = client.files.get(name=name)
136
+ myfile = client.files.get(name=unique_name)
136
137
  content.append(myfile)
137
138
  content.append(f"You have been given the ability to work with file {display_name=} with {mime_type=} {display_url=}")
138
- log.info(f"Found existing genai.get_file {name=}")
139
+ log.info(f"Found existing genai.get_file {unique_name=}")
139
140
  except Exception as e:
140
- log.info(f"Not found checking genai.get_file: '{name}' {str(e)}")
141
+ log.info(f"Not found checking genai.get_file: '{unique_name}' {str(e)}")
141
142
  tasks.append(
142
143
  download_gcs_upload_genai(img_url,
143
- mime_type=mime_type,
144
- name=name,
145
- display_url=display_url,
146
- display_name=display_name,
147
- genai_lib=genai_lib)
148
- )
149
-
150
- # Run all tasks in parallel
151
- if tasks:
152
- task_content = await asyncio.gather(*tasks)
153
- content.extend(task_content)
144
+ mime_type=mime_type,
145
+ name=unique_name,
146
+ display_url=display_url,
147
+ display_name=display_name,
148
+ genai_lib=genai_lib)
149
+ )
154
150
 
151
+ # Process files in batches to avoid overwhelming the system
152
+ content_results = []
153
+ batch_size = 3 # Process 3 files at a time
154
+
155
+ for i in range(0, len(tasks), batch_size):
156
+ batch = tasks[i:i+batch_size]
157
+ batch_results = await asyncio.gather(*batch)
158
+ content_results.extend(batch_results)
159
+
160
+ content.extend(content_results)
155
161
  return content
156
162
 
157
163
  # Helper function to handle each file download with error handling
@@ -169,10 +175,9 @@ async def download_gcs_upload_genai(img_url,
169
175
  display_url=None,
170
176
  display_name=None,
171
177
  retries=3, delay=2, genai_lib=False):
172
- import aiofiles
173
- from google.generativeai.types import file_types
174
178
  """
175
179
  Downloads and uploads a file with retries in case of failure.
180
+ Thread-safe implementation using unique file paths.
176
181
 
177
182
  Args:
178
183
  - img_url: str The URL of the file to download.
@@ -184,9 +189,10 @@ async def download_gcs_upload_genai(img_url,
184
189
  Returns:
185
190
  - downloaded_content: The result of the file upload if successful.
186
191
  """
192
+ import aiofiles
187
193
  for attempt in range(retries):
188
194
  try:
189
- log.info(f"Upload {attempt} for {img_url=}")
195
+ log.info(f"Upload attempt [{attempt}] for {img_url=}")
190
196
  # Download the file bytes asynchronously
191
197
  file_bytes = await asyncio.to_thread(get_bytes_from_gcs, img_url)
192
198
  if not file_bytes:
@@ -200,50 +206,72 @@ async def download_gcs_upload_genai(img_url,
200
206
 
201
207
  if file_size > 19434343:
202
208
  log.warning(f"File size for {img_url}: {file_size} is too big.")
203
- msg = f"The file for {img_url} is too large ({file_size} bytes) to be used directly. Use RAG instead or {display_url=}"
209
+ msg = f"The file for {img_url} is too large ({file_size} bytes) to be used directly. Use RAG instead or {display_url=}"
204
210
  return {"role": "user", "parts": [{"text": msg}]}
205
211
 
206
212
  extension = mimetypes.guess_extension(mime_type)
207
213
 
208
- # Use aiofiles for asynchronous file operations
209
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=extension)
210
- downloaded_file = temp_file.name
211
-
212
- sanitized_file = sanitize_file(downloaded_file)
213
-
214
- log.info(f"Writing file {sanitized_file}")
215
- async with aiofiles.open(sanitized_file, 'wb') as f:
214
+ # Create a unique directory for this upload task
215
+ unique_id = str(uuid.uuid4())
216
+ temp_dir = os.path.join(tempfile.gettempdir(), f"upload_{unique_id}")
217
+ os.makedirs(temp_dir, exist_ok=True)
218
+
219
+ # Create a file with unique path
220
+ file_path = os.path.join(temp_dir, f"file_{unique_id}{extension}")
221
+
222
+ log.info(f"Writing file {file_path}")
223
+ async with aiofiles.open(file_path, 'wb') as f:
216
224
  await f.write(file_bytes)
217
225
 
218
- # Upload the file and get its content reference
219
- if not genai_lib:
220
- try:
221
- downloaded_content: file_types.File = await asyncio.to_thread(
226
+ try:
227
+ if not genai_lib:
228
+ downloaded_content = await asyncio.to_thread(
222
229
  partial(genai.upload_file, name=name, mime_type=mime_type, display_name=display_name),
223
- sanitized_file
224
- )
230
+ file_path
231
+ )
232
+
233
+ # Clean up after successful upload
234
+ try:
235
+ os.remove(file_path)
236
+ os.rmdir(temp_dir)
237
+ except OSError as e:
238
+ log.warning(f"Cleanup error (non-critical): {str(e)}")
239
+
225
240
  return {"role": "user", "parts": [{"file_data": downloaded_content},
226
- {"text": f"You have been given the ability to read and work with filename '{display_name=}' with {mime_type=} {display_url=}"}
227
- ]}
228
- except Exception as err:
229
- msg = f"Could not upload {sanitized_file} to genai.upload_file: {str(err)} {traceback.format_exc()} {display_url=}"
230
- log.error(msg)
231
- return {"role": "user", "parts": [{"text": msg}]}
232
- else:
233
- try:
241
+ {"text": f"You have been given the ability to read and work with filename '{display_name}' with {mime_type=} {display_url=}"}
242
+ ]}
243
+ else:
234
244
  client = genaiv2.Client()
235
- downloaded_content = await asyncio.to_thread(
236
- client.files.upload,
237
- file=sanitized_file,
238
- config=dict(mime_type=mime_type, display_name=display_name)
239
- )
245
+
246
+ # Use semaphore to limit concurrent uploads
247
+ async with upload_semaphore:
248
+ downloaded_content = await asyncio.to_thread(
249
+ client.files.upload,
250
+ file=file_path,
251
+ config=dict(mime_type=mime_type, display_name=display_name)
252
+ )
253
+
254
+ # Clean up after successful upload
255
+ try:
256
+ os.remove(file_path)
257
+ os.rmdir(temp_dir)
258
+ except OSError as e:
259
+ log.warning(f"Cleanup error (non-critical): {str(e)}")
260
+
240
261
  return [downloaded_content,
241
- f"You have been given the ability to read and work with filename '{display_name=}' with {mime_type=} {display_url=}"]
262
+ f"You have been given the ability to read and work with filename '{display_name}' with {mime_type=} {display_url=}"]
242
263
 
243
- except Exception as err:
244
- msg = f"Could not upload {sanitized_file} to genaiv2.client.files.upload: {str(err)} {traceback.format_exc()} {display_url=}"
245
- log.error(msg)
246
- return {"role": "user", "parts": [{"text": msg}]}
264
+ except Exception as err:
265
+ # Clean up on error
266
+ try:
267
+ os.remove(file_path)
268
+ os.rmdir(temp_dir)
269
+ except OSError:
270
+ pass
271
+
272
+ msg = f"Could not upload {file_path} to {'genai.upload_file' if not genai_lib else 'genaiv2.client.files.upload'}: {str(err)} {traceback.format_exc()} {display_url=}"
273
+ log.error(msg)
274
+ return {"role": "user", "parts": [{"text": msg}]}
247
275
 
248
276
  except Exception as err:
249
277
  log.error(f"Error processing file {img_url} {mime_type=} on attempt {attempt + 1}/{retries}: {str(err)}")
@@ -255,3 +283,7 @@ async def download_gcs_upload_genai(img_url,
255
283
  else:
256
284
  raise err # Raise the error after max retries
257
285
 
286
+ # Add this at the module level
287
+ # Create a semaphore to limit concurrent uploads
288
+ upload_semaphore = asyncio.Semaphore(5) # Adjust the value based on your needs
289
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: sunholo
3
- Version: 0.119.16
3
+ Version: 0.120.0
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Author-email: Holosun ApS <multivac@sunholo.com>
6
6
  License: Apache License, Version 2.0
@@ -28,6 +28,7 @@ Provides-Extra: test
28
28
  Requires-Dist: pytest; extra == "test"
29
29
  Requires-Dist: pytest-cov; extra == "test"
30
30
  Provides-Extra: all
31
+ Requires-Dist: aiofiles; extra == "all"
31
32
  Requires-Dist: aiohttp; extra == "all"
32
33
  Requires-Dist: anthropic[vertex]; extra == "all"
33
34
  Requires-Dist: asyncpg; extra == "all"
@@ -50,7 +51,7 @@ Requires-Dist: google-cloud-pubsub; extra == "all"
50
51
  Requires-Dist: google-cloud-discoveryengine>=0.13.4; extra == "all"
51
52
  Requires-Dist: google-cloud-texttospeech; extra == "all"
52
53
  Requires-Dist: google-generativeai>=0.7.1; extra == "all"
53
- Requires-Dist: google-genai; extra == "all"
54
+ Requires-Dist: google-genai>=0.2.2; extra == "all"
54
55
  Requires-Dist: gunicorn; extra == "all"
55
56
  Requires-Dist: httpcore; extra == "all"
56
57
  Requires-Dist: httpx; extra == "all"
@@ -122,6 +123,7 @@ Requires-Dist: pytesseract; extra == "pipeline"
122
123
  Requires-Dist: tabulate; extra == "pipeline"
123
124
  Requires-Dist: unstructured[all-docs,local-inference]; extra == "pipeline"
124
125
  Provides-Extra: gcp
126
+ Requires-Dist: aiofiles; extra == "gcp"
125
127
  Requires-Dist: anthropic[vertex]; extra == "gcp"
126
128
  Requires-Dist: google-api-python-client; extra == "gcp"
127
129
  Requires-Dist: google-auth-httplib2; extra == "gcp"
@@ -136,7 +138,7 @@ Requires-Dist: google-cloud-logging; extra == "gcp"
136
138
  Requires-Dist: google-cloud-pubsub; extra == "gcp"
137
139
  Requires-Dist: google-cloud-discoveryengine>=0.13.4; extra == "gcp"
138
140
  Requires-Dist: google-cloud-texttospeech; extra == "gcp"
139
- Requires-Dist: google-genai; extra == "gcp"
141
+ Requires-Dist: google-genai>=0.2.2; extra == "gcp"
140
142
  Requires-Dist: google-generativeai>=0.8.3; extra == "gcp"
141
143
  Requires-Dist: langchain-google-genai>=2.0.0; extra == "gcp"
142
144
  Requires-Dist: langchain_google_alloydb_pg>=0.2.2; extra == "gcp"
@@ -20,7 +20,7 @@ sunholo/archive/__init__.py,sha256=qNHWm5rGPVOlxZBZCpA1wTYPbalizRT7f8X4rs2t290,3
20
20
  sunholo/archive/archive.py,sha256=PxVfDtO2_2ZEEbnhXSCbXLdeoHoQVImo4y3Jr2XkCFY,1204
21
21
  sunholo/auth/__init__.py,sha256=TeP-OY0XGxYV_8AQcVGoh35bvyWhNUcMRfhuD5l44Sk,91
22
22
  sunholo/auth/gcloud.py,sha256=PdbwkuTdRi4RKBmgG9uwsReegqC4VG15_tw5uzmA7Fs,298
23
- sunholo/auth/refresh.py,sha256=WSlKa8TQ70GlZ2e0u83nYknhUsgTeiyyTVi-EFOa8Uc,2029
23
+ sunholo/auth/refresh.py,sha256=cj2t337KbGfsqulrTLuhNszQcJ6PyRHzHvbD6qH-wUs,2118
24
24
  sunholo/auth/run.py,sha256=pMSp2lzL6e6ZqlltVUH92bkeUt341yMue027qrE0jQU,2821
25
25
  sunholo/azure/__init__.py,sha256=S1WQ5jndzNgzhSBh9UpX_yw7hRVm3hCzkAWNxUdK4dA,48
26
26
  sunholo/azure/auth.py,sha256=Y3fDqFLYwbsIyi5hS5L-3hYnwrLWVL96yPng5Sj5c2c,2236
@@ -75,7 +75,7 @@ sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8R
75
75
  sunholo/discovery_engine/chunker_handler.py,sha256=2775W5wHzdkYXqkRMlwh8MRbas20wcMnnnNngo0ljms,6160
76
76
  sunholo/discovery_engine/cli.py,sha256=KGVle5rkLL49oF9TQhrGI--8017IvvLOEoYur545Qb0,12790
77
77
  sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
78
- sunholo/discovery_engine/discovery_engine_client.py,sha256=ZPe4MPHWJpq1MXKkEmyG26bIiwuRoSeomCPiNtduezM,36935
78
+ sunholo/discovery_engine/discovery_engine_client.py,sha256=NjIcP10I2-8yj6QZKrxGzNbh3SqQ5vGYsq9OwxCpWas,36935
79
79
  sunholo/discovery_engine/get_ai_search_chunks.py,sha256=I6Dt1CznqEvE7XIZ2PkLqopmjpO96iVEWJJqL5cJjOU,5554
80
80
  sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
81
81
  sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
@@ -89,7 +89,7 @@ sunholo/gcs/download_url.py,sha256=Ul81n1rklr8WogPsuxWWD1Nr8RHU451LzHPMJNhAKzw,6
89
89
  sunholo/gcs/extract_and_sign.py,sha256=paRrTCvCN5vkQwCB7OSkxWi-pfOgOtZ0bwdXE08c3Ps,1546
90
90
  sunholo/gcs/metadata.py,sha256=oQLcXi4brsZ74aegWyC1JZmhlaEV270HS5_UWtAYYWE,898
91
91
  sunholo/genai/__init__.py,sha256=TV3PYHWoR4cChdmCOaYB0PtAEQ86qol9XYYEtb1JmSA,239
92
- sunholo/genai/file_handling.py,sha256=5wN8ynrrLLiY4JRRCCWLcqnxXVeYjgw26IH4itTrFCc,9564
92
+ sunholo/genai/file_handling.py,sha256=qHbSs2dFt_N5obL6btB8gCOhEioLrjoB76Tn8COTBLY,10820
93
93
  sunholo/genai/genaiv2.py,sha256=uqWcfvlsPVPyQo-W_cP9h2TTzyYfzj4lyJlyqPyKTkI,20269
94
94
  sunholo/genai/images.py,sha256=EyjsDqt6XQw99pZUQamomCpMOoIah9bp3XY94WPU7Ms,1678
95
95
  sunholo/genai/init.py,sha256=yG8E67TduFCTQPELo83OJuWfjwTnGZsyACospahyEaY,687
@@ -166,9 +166,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
166
166
  sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
167
167
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
168
168
  sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
169
- sunholo-0.119.16.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
170
- sunholo-0.119.16.dist-info/METADATA,sha256=5ZynkhkxJECDG9oZcwhMCI3bM-_odAsKR4zzizwg6v8,9715
171
- sunholo-0.119.16.dist-info/WHEEL,sha256=nn6H5-ilmfVryoAQl3ZQ2l8SH5imPWFpm1A5FgEuFV4,91
172
- sunholo-0.119.16.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
173
- sunholo-0.119.16.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
174
- sunholo-0.119.16.dist-info/RECORD,,
169
+ sunholo-0.120.0.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
170
+ sunholo-0.120.0.dist-info/METADATA,sha256=hDqIBciQ8Rq9Xq2ESCQErYMFFygdKvs_yZsmzcFZ24s,9808
171
+ sunholo-0.120.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
172
+ sunholo-0.120.0.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
173
+ sunholo-0.120.0.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
174
+ sunholo-0.120.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.1)
2
+ Generator: setuptools (75.8.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5