sunholo 0.96.6__py3-none-any.whl → 0.96.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -69,8 +69,10 @@ class DiscoveryEngineClient:
69
69
  self.store_client = discoveryengine.DataStoreServiceClient(client_options=client_options)
70
70
  self.doc_client = discoveryengine.DocumentServiceClient(client_options=client_options)
71
71
  self.search_client = discoveryengine.SearchServiceClient(client_options=client_options)
72
+ self.async_search_client = discoveryengine.SearchServiceAsyncClient(client_options=client_options)
72
73
  self.engine_client = discoveryengine.EngineServiceClient(client_options=client_options)
73
74
 
75
+
74
76
  @classmethod
75
77
  def my_retry(cls):
76
78
  return Retry(
@@ -221,6 +223,71 @@ class DiscoveryEngineClient:
221
223
  log.info(f"Discovery engine request: {search_request=}")
222
224
  search_response = self.search_client.search(search_request)
223
225
 
226
+ if parse_chunks_to_string:
227
+
228
+ big_string = self.process_chunks(search_response)
229
+ log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
230
+
231
+ return big_string
232
+
233
+ log.info("Discovery engine response object")
234
+ return search_response
235
+
236
+ async def async_get_chunks(
237
+ self,
238
+ query: str,
239
+ num_previous_chunks: int = 3,
240
+ num_next_chunks: int = 3,
241
+ page_size: int = 10,
242
+ parse_chunks_to_string: bool = True,
243
+ serving_config: str = "default_serving_config",
244
+ ):
245
+ """Retrieves chunks or documents based on a query.
246
+
247
+ Args:
248
+ query (str): The search query.
249
+ collection_id (str): The ID of the collection to search.
250
+ num_previous_chunks (int, optional): Number of previous chunks to return for context (default is 3).
251
+ num_next_chunks (int, optional): Number of next chunks to return for context (default is 3).
252
+ page_size (int, optional): The maximum number of results to return per page (default is 10).
253
+ parse_chunks_to_string: If True will put chunks in one big string, False will return object
254
+ serving_config: The resource name of the Search serving config
255
+
256
+ Returns:
257
+ discoveryengine.SearchResponse: The search response object containing the search results.
258
+
259
+ Example:
260
+ ```python
261
+ search_response = client.get_chunks('your query', 'your_collection_id')
262
+ for result in search_response.results:
263
+ for chunk in result.document.chunks:
264
+ print(f"Chunk: {chunk.snippet}, document name: {chunk.document_name}")
265
+ ```
266
+ """
267
+
268
+ serving_config_path = self.async_search_client.serving_config_path(
269
+ self.project_id,
270
+ self.location,
271
+ self.data_store_id,
272
+ serving_config
273
+ )
274
+
275
+
276
+ search_request = discoveryengine.SearchRequest(
277
+ serving_config=serving_config_path,
278
+ query=query,
279
+ page_size=page_size,
280
+ content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
281
+ search_result_mode="CHUNKS",
282
+ chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
283
+ num_previous_chunks=num_previous_chunks,
284
+ num_next_chunks=num_next_chunks,
285
+ ),
286
+ ),
287
+ )
288
+
289
+ log.info(f"Discovery engine async request: {search_request=}")
290
+ search_response = self.async_search_client.search(search_request)
224
291
 
225
292
  if parse_chunks_to_string:
226
293
 
@@ -232,7 +299,7 @@ class DiscoveryEngineClient:
232
299
  log.info("Discovery engine response object")
233
300
  return search_response
234
301
 
235
- def chunk_format(self, chunk: Chunk):
302
+ def chunk_format(self, chunk):
236
303
  return (
237
304
  f"# {chunk.id}\n"
238
305
  f"{chunk.content}\n"
@@ -241,7 +308,7 @@ class DiscoveryEngineClient:
241
308
  f"Document Title: {chunk.document_metadata.title}\n"
242
309
  )
243
310
 
244
- def process_chunks(self, response: SearchResponse):
311
+ def process_chunks(self, response):
245
312
  all_chunks = []
246
313
 
247
314
  # Check if the response contains results
@@ -269,6 +336,35 @@ class DiscoveryEngineClient:
269
336
  result_string = "\n".join(all_chunks)
270
337
 
271
338
  return result_string
339
+
340
+ async def async_process_chunks(self, response):
341
+ all_chunks = []
342
+
343
+ # Check if the response contains results
344
+ if not hasattr(response, 'results') or not response.results:
345
+ raise ValueError(f'No results found in response: {response=}')
346
+
347
+ # Iterate through each result in the response
348
+ async for result in response.results:
349
+ chunk = result.chunk
350
+ chunk_metadata = chunk.ChunkMetadata
351
+
352
+ if hasattr(chunk_metadata, 'previous_chunks'):
353
+ # Process previous chunks
354
+ for prev_chunk in chunk_metadata.previous_chunks:
355
+ all_chunks.append(self.chunk_format(prev_chunk))
356
+
357
+ all_chunks.append(self.chunk_format(chunk))
358
+
359
+ # Process next chunks
360
+ if hasattr(chunk_metadata, 'next_chunks'):
361
+ for next_chunk in chunk_metadata.next_chunks:
362
+ all_chunks.append(self.chunk_format(next_chunk))
363
+
364
+ # Combine all chunks into one long string
365
+ result_string = "\n".join(all_chunks)
366
+
367
+ return result_string
272
368
 
273
369
  def create_engine(self,
274
370
  engine_id: str,
@@ -29,7 +29,9 @@ def get_all_chunks(question:str, config:ConfigManager):
29
29
  new_vector_name = value.get('vector_name')
30
30
  if not new_vector_name:
31
31
  log.warning("read_only specified but no new vector_name to read from")
32
- vector_name = new_vector_name
32
+ continue
33
+ else:
34
+ vector_name = new_vector_name
33
35
 
34
36
  num_chunks = value.get('num_chunks') or 3
35
37
 
@@ -51,3 +53,49 @@ def get_chunks(question, vector_name, num_chunks):
51
53
 
52
54
 
53
55
 
56
+ async def async_get_all_chunks(question:str, config:ConfigManager):
57
+ """
58
+ Look through a config memory key and find all Vertex AI Search retrievers, call them and return a joined string of chunks
59
+
60
+ args: question - question to search similarity for
61
+ config: A ConfigManager object
62
+
63
+ returns: a big string of chunks
64
+ """
65
+ memories = load_memories(config=config)
66
+ chunks = []
67
+
68
+ if not memories:
69
+ return None
70
+
71
+ vector_name = config.vector_name
72
+ for memory in memories:
73
+ for key, value in memory.items(): # Now iterate over the dictionary
74
+ log.info(f"Found memory {key}")
75
+ vectorstore = value.get('vectorstore')
76
+ if vectorstore == "discovery_engine" or vectorstore == "vertex_ai_search":
77
+ if value.get('read_only'):
78
+ new_vector_name = value.get('vector_name')
79
+ if not new_vector_name:
80
+ log.warning("read_only specified but no new vector_name to read from")
81
+ continue
82
+ else:
83
+ vector_name = new_vector_name
84
+
85
+ num_chunks = value.get('num_chunks') or 3
86
+
87
+ chunk = await async_get_chunks(question, vector_name, num_chunks)
88
+ if chunk:
89
+ chunks.append(chunk)
90
+ if chunks:
91
+ return "\n".join(chunks)
92
+
93
+ log.warning(f"No chunks found for {vector_name}")
94
+ return None
95
+
96
+ async def async_get_chunks(question, vector_name, num_chunks):
97
+ de = DiscoveryEngineClient(vector_name, project_id=get_gcp_project(include_config=True))
98
+ try:
99
+ return await de.async_get_chunks(question, num_previous_chunks=num_chunks, num_next_chunks=num_chunks)
100
+ except Exception as err:
101
+ log.error(f"No discovery engine chunks found: {str(err)}")
@@ -186,7 +186,6 @@ def print_grounding_response(response):
186
186
  if not context:
187
187
  print(f"Skipping Grounding Chunk {grounding_chunk}")
188
188
  continue
189
- markdown_text += "### Grounding Chunks\n"
190
189
 
191
190
  markdown_text += f"{index}. [{context.title}]({context.uri})\n"
192
191
 
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.96.6
3
+ Version: 0.96.8
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.96.6.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.96.8.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -73,8 +73,8 @@ sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3
73
73
  sunholo/discovery_engine/__init__.py,sha256=P00bB8aVVWefOZbCQvzHsVMuP_sd-_d_4o5xCuCpN3g,108
74
74
  sunholo/discovery_engine/chunker_handler.py,sha256=Fv4BLOBi_7ap3AiAy4TlTN48CLZSMurJ3TkvC75Euro,5123
75
75
  sunholo/discovery_engine/create_new.py,sha256=NzhSh6nG6nQ5J9gZh8IDph4JiEVT_DC5GGvP0GuwTWs,943
76
- sunholo/discovery_engine/discovery_engine_client.py,sha256=oORB2SVVqrYrz7E3srPrknyuR6Dl3SJJwaVrbVXJER4,17726
77
- sunholo/discovery_engine/get_ai_search_chunks.py,sha256=VPzdYoBP_E6Bko0KpX656QiIfJdwmje4sBnPtZs4JQ4,1963
76
+ sunholo/discovery_engine/discovery_engine_client.py,sha256=FjcKCIeLz40Xn8DqwHZuHCYp2-oOFHw-doy1v-ULnEk,21536
77
+ sunholo/discovery_engine/get_ai_search_chunks.py,sha256=6SO6v_4AcrUat0bP7wqC8xg9aY916Fnw_aZsogrLx-g,3877
78
78
  sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
79
79
  sunholo/embedder/embed_chunk.py,sha256=MCbTePWjUbIRVDFFhHJ94BvOZvIom62-mTr0PmfQyt0,6951
80
80
  sunholo/excel/__init__.py,sha256=AqTMN9K4qJYi4maEgoORc5oxDVGO_eqmwzDaVP37JgY,56
@@ -141,12 +141,12 @@ sunholo/vertex/extensions_call.py,sha256=QeQbL3aAHlc4_-SynOzooZ_3xkQWAlcgNmFBSwL
141
141
  sunholo/vertex/extensions_class.py,sha256=2QGW28lNjoMEnaoVb3QcqEDwphclIsZthnpLUi5_Ivo,21033
142
142
  sunholo/vertex/genai_functions.py,sha256=2z6grM9H0Z79Yzx88l8mE1wXck3bRa0TWvnqZZ9ifDc,2051
143
143
  sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
144
- sunholo/vertex/memory_tools.py,sha256=ZirFbS7EKxQaoRmOrb4BnG6jPJ83wt43N8M4zTVbutU,7717
144
+ sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
145
145
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
146
146
  sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
147
- sunholo-0.96.6.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
148
- sunholo-0.96.6.dist-info/METADATA,sha256=QPP47osAKjNWQQDRtLo39EpL5itPCK6xf3B05XLhKEE,7889
149
- sunholo-0.96.6.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
150
- sunholo-0.96.6.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
151
- sunholo-0.96.6.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
152
- sunholo-0.96.6.dist-info/RECORD,,
147
+ sunholo-0.96.8.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
148
+ sunholo-0.96.8.dist-info/METADATA,sha256=KRXAVqOiJVX5B7yEYYJE47o6IElXxmjkQx5JsdJAQ2g,7889
149
+ sunholo-0.96.8.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
150
+ sunholo-0.96.8.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
151
+ sunholo-0.96.8.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
152
+ sunholo-0.96.8.dist-info/RECORD,,