sunholo 0.96.7__py3-none-any.whl → 0.96.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/discovery_engine/__init__.py +1 -1
- sunholo/discovery_engine/discovery_engine_client.py +98 -2
- sunholo/discovery_engine/get_ai_search_chunks.py +49 -1
- {sunholo-0.96.7.dist-info → sunholo-0.96.9.dist-info}/METADATA +15 -2
- {sunholo-0.96.7.dist-info → sunholo-0.96.9.dist-info}/RECORD +9 -9
- {sunholo-0.96.7.dist-info → sunholo-0.96.9.dist-info}/LICENSE.txt +0 -0
- {sunholo-0.96.7.dist-info → sunholo-0.96.9.dist-info}/WHEEL +0 -0
- {sunholo-0.96.7.dist-info → sunholo-0.96.9.dist-info}/entry_points.txt +0 -0
- {sunholo-0.96.7.dist-info → sunholo-0.96.9.dist-info}/top_level.txt +0 -0
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
from .discovery_engine_client import DiscoveryEngineClient
|
|
2
|
-
from .get_ai_search_chunks import get_all_chunks
|
|
2
|
+
from .get_ai_search_chunks import get_all_chunks, async_get_all_chunks
|
|
@@ -69,8 +69,10 @@ class DiscoveryEngineClient:
|
|
|
69
69
|
self.store_client = discoveryengine.DataStoreServiceClient(client_options=client_options)
|
|
70
70
|
self.doc_client = discoveryengine.DocumentServiceClient(client_options=client_options)
|
|
71
71
|
self.search_client = discoveryengine.SearchServiceClient(client_options=client_options)
|
|
72
|
+
self.async_search_client = discoveryengine.SearchServiceAsyncClient(client_options=client_options)
|
|
72
73
|
self.engine_client = discoveryengine.EngineServiceClient(client_options=client_options)
|
|
73
74
|
|
|
75
|
+
|
|
74
76
|
@classmethod
|
|
75
77
|
def my_retry(cls):
|
|
76
78
|
return Retry(
|
|
@@ -221,6 +223,71 @@ class DiscoveryEngineClient:
|
|
|
221
223
|
log.info(f"Discovery engine request: {search_request=}")
|
|
222
224
|
search_response = self.search_client.search(search_request)
|
|
223
225
|
|
|
226
|
+
if parse_chunks_to_string:
|
|
227
|
+
|
|
228
|
+
big_string = self.process_chunks(search_response)
|
|
229
|
+
log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
|
|
230
|
+
|
|
231
|
+
return big_string
|
|
232
|
+
|
|
233
|
+
log.info("Discovery engine response object")
|
|
234
|
+
return search_response
|
|
235
|
+
|
|
236
|
+
async def async_get_chunks(
|
|
237
|
+
self,
|
|
238
|
+
query: str,
|
|
239
|
+
num_previous_chunks: int = 3,
|
|
240
|
+
num_next_chunks: int = 3,
|
|
241
|
+
page_size: int = 10,
|
|
242
|
+
parse_chunks_to_string: bool = True,
|
|
243
|
+
serving_config: str = "default_serving_config",
|
|
244
|
+
):
|
|
245
|
+
"""Retrieves chunks or documents based on a query.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
query (str): The search query.
|
|
249
|
+
collection_id (str): The ID of the collection to search.
|
|
250
|
+
num_previous_chunks (int, optional): Number of previous chunks to return for context (default is 3).
|
|
251
|
+
num_next_chunks (int, optional): Number of next chunks to return for context (default is 3).
|
|
252
|
+
page_size (int, optional): The maximum number of results to return per page (default is 10).
|
|
253
|
+
parse_chunks_to_string: If True will put chunks in one big string, False will return object
|
|
254
|
+
serving_config: The resource name of the Search serving config
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
discoveryengine.SearchResponse: The search response object containing the search results.
|
|
258
|
+
|
|
259
|
+
Example:
|
|
260
|
+
```python
|
|
261
|
+
search_response = client.get_chunks('your query', 'your_collection_id')
|
|
262
|
+
for result in search_response.results:
|
|
263
|
+
for chunk in result.document.chunks:
|
|
264
|
+
print(f"Chunk: {chunk.snippet}, document name: {chunk.document_name}")
|
|
265
|
+
```
|
|
266
|
+
"""
|
|
267
|
+
|
|
268
|
+
serving_config_path = self.async_search_client.serving_config_path(
|
|
269
|
+
self.project_id,
|
|
270
|
+
self.location,
|
|
271
|
+
self.data_store_id,
|
|
272
|
+
serving_config
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
search_request = discoveryengine.SearchRequest(
|
|
277
|
+
serving_config=serving_config_path,
|
|
278
|
+
query=query,
|
|
279
|
+
page_size=page_size,
|
|
280
|
+
content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
|
|
281
|
+
search_result_mode="CHUNKS",
|
|
282
|
+
chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
|
|
283
|
+
num_previous_chunks=num_previous_chunks,
|
|
284
|
+
num_next_chunks=num_next_chunks,
|
|
285
|
+
),
|
|
286
|
+
),
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
log.info(f"Discovery engine async request: {search_request=}")
|
|
290
|
+
search_response = self.async_search_client.search(search_request)
|
|
224
291
|
|
|
225
292
|
if parse_chunks_to_string:
|
|
226
293
|
|
|
@@ -232,7 +299,7 @@ class DiscoveryEngineClient:
|
|
|
232
299
|
log.info("Discovery engine response object")
|
|
233
300
|
return search_response
|
|
234
301
|
|
|
235
|
-
def chunk_format(self, chunk
|
|
302
|
+
def chunk_format(self, chunk):
|
|
236
303
|
return (
|
|
237
304
|
f"# {chunk.id}\n"
|
|
238
305
|
f"{chunk.content}\n"
|
|
@@ -241,7 +308,7 @@ class DiscoveryEngineClient:
|
|
|
241
308
|
f"Document Title: {chunk.document_metadata.title}\n"
|
|
242
309
|
)
|
|
243
310
|
|
|
244
|
-
def process_chunks(self, response
|
|
311
|
+
def process_chunks(self, response):
|
|
245
312
|
all_chunks = []
|
|
246
313
|
|
|
247
314
|
# Check if the response contains results
|
|
@@ -269,6 +336,35 @@ class DiscoveryEngineClient:
|
|
|
269
336
|
result_string = "\n".join(all_chunks)
|
|
270
337
|
|
|
271
338
|
return result_string
|
|
339
|
+
|
|
340
|
+
async def async_process_chunks(self, response):
|
|
341
|
+
all_chunks = []
|
|
342
|
+
|
|
343
|
+
# Check if the response contains results
|
|
344
|
+
if not hasattr(response, 'results') or not response.results:
|
|
345
|
+
raise ValueError(f'No results found in response: {response=}')
|
|
346
|
+
|
|
347
|
+
# Iterate through each result in the response
|
|
348
|
+
async for result in response.results:
|
|
349
|
+
chunk = result.chunk
|
|
350
|
+
chunk_metadata = chunk.ChunkMetadata
|
|
351
|
+
|
|
352
|
+
if hasattr(chunk_metadata, 'previous_chunks'):
|
|
353
|
+
# Process previous chunks
|
|
354
|
+
for prev_chunk in chunk_metadata.previous_chunks:
|
|
355
|
+
all_chunks.append(self.chunk_format(prev_chunk))
|
|
356
|
+
|
|
357
|
+
all_chunks.append(self.chunk_format(chunk))
|
|
358
|
+
|
|
359
|
+
# Process next chunks
|
|
360
|
+
if hasattr(chunk_metadata, 'next_chunks'):
|
|
361
|
+
for next_chunk in chunk_metadata.next_chunks:
|
|
362
|
+
all_chunks.append(self.chunk_format(next_chunk))
|
|
363
|
+
|
|
364
|
+
# Combine all chunks into one long string
|
|
365
|
+
result_string = "\n".join(all_chunks)
|
|
366
|
+
|
|
367
|
+
return result_string
|
|
272
368
|
|
|
273
369
|
def create_engine(self,
|
|
274
370
|
engine_id: str,
|
|
@@ -29,7 +29,9 @@ def get_all_chunks(question:str, config:ConfigManager):
|
|
|
29
29
|
new_vector_name = value.get('vector_name')
|
|
30
30
|
if not new_vector_name:
|
|
31
31
|
log.warning("read_only specified but no new vector_name to read from")
|
|
32
|
-
|
|
32
|
+
continue
|
|
33
|
+
else:
|
|
34
|
+
vector_name = new_vector_name
|
|
33
35
|
|
|
34
36
|
num_chunks = value.get('num_chunks') or 3
|
|
35
37
|
|
|
@@ -51,3 +53,49 @@ def get_chunks(question, vector_name, num_chunks):
|
|
|
51
53
|
|
|
52
54
|
|
|
53
55
|
|
|
56
|
+
async def async_get_all_chunks(question:str, config:ConfigManager):
|
|
57
|
+
"""
|
|
58
|
+
Look through a config memory key and find all Vertex AI Search retrievers, call them and return a joined string of chunks
|
|
59
|
+
|
|
60
|
+
args: question - question to search similarity for
|
|
61
|
+
config: A ConfigManager object
|
|
62
|
+
|
|
63
|
+
returns: a big string of chunks
|
|
64
|
+
"""
|
|
65
|
+
memories = load_memories(config=config)
|
|
66
|
+
chunks = []
|
|
67
|
+
|
|
68
|
+
if not memories:
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
vector_name = config.vector_name
|
|
72
|
+
for memory in memories:
|
|
73
|
+
for key, value in memory.items(): # Now iterate over the dictionary
|
|
74
|
+
log.info(f"Found memory {key}")
|
|
75
|
+
vectorstore = value.get('vectorstore')
|
|
76
|
+
if vectorstore == "discovery_engine" or vectorstore == "vertex_ai_search":
|
|
77
|
+
if value.get('read_only'):
|
|
78
|
+
new_vector_name = value.get('vector_name')
|
|
79
|
+
if not new_vector_name:
|
|
80
|
+
log.warning("read_only specified but no new vector_name to read from")
|
|
81
|
+
continue
|
|
82
|
+
else:
|
|
83
|
+
vector_name = new_vector_name
|
|
84
|
+
|
|
85
|
+
num_chunks = value.get('num_chunks') or 3
|
|
86
|
+
|
|
87
|
+
chunk = await async_get_chunks(question, vector_name, num_chunks)
|
|
88
|
+
if chunk:
|
|
89
|
+
chunks.append(chunk)
|
|
90
|
+
if chunks:
|
|
91
|
+
return "\n".join(chunks)
|
|
92
|
+
|
|
93
|
+
log.warning(f"No chunks found for {vector_name}")
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
async def async_get_chunks(question, vector_name, num_chunks):
|
|
97
|
+
de = DiscoveryEngineClient(vector_name, project_id=get_gcp_project(include_config=True))
|
|
98
|
+
try:
|
|
99
|
+
return await de.async_get_chunks(question, num_previous_chunks=num_chunks, num_next_chunks=num_chunks)
|
|
100
|
+
except Exception as err:
|
|
101
|
+
log.error(f"No discovery engine chunks found: {str(err)}")
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sunholo
|
|
3
|
-
Version: 0.96.
|
|
3
|
+
Version: 0.96.9
|
|
4
4
|
Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
|
|
5
5
|
Home-page: https://github.com/sunholo-data/sunholo-py
|
|
6
|
-
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.96.
|
|
6
|
+
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.96.9.tar.gz
|
|
7
7
|
Author: Holosun ApS
|
|
8
8
|
Author-email: multivac@sunholo.com
|
|
9
9
|
License: Apache License, Version 2.0
|
|
@@ -148,6 +148,19 @@ This is the Sunholo Python project, a comprehensive toolkit for working with lan
|
|
|
148
148
|
|
|
149
149
|
Please refer to the website for full documentation at https://dev.sunholo.com/
|
|
150
150
|
|
|
151
|
+
## Listen to the audio file:
|
|
152
|
+
|
|
153
|
+
A [NotebookLM](https://notebooklm.google/) generated podcast of the codebase that may help give you an overview of what the library is capable of:
|
|
154
|
+
|
|
155
|
+
<audio controls>
|
|
156
|
+
<source src="https://drive.google.com/uc?export=download&id=1GvwRmiYDjPjN2hXQ8plhnVDByu6TmgCQ" type="audio/wav">
|
|
157
|
+
Your browser does not support the audio element.
|
|
158
|
+
</audio>
|
|
159
|
+
|
|
160
|
+
[Alternatively, Download the audio file from Google Drive](https://drive.google.com/uc?export=download&id=1GvwRmiYDjPjN2hXQ8plhnVDByu6TmgCQ)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
|
|
151
164
|
## Tests via pytest
|
|
152
165
|
|
|
153
166
|
If loading from GitHub, run tests:
|
|
@@ -70,11 +70,11 @@ sunholo/database/sql/sb/create_table.sql,sha256=SbcOrf5tUiVKGUohu1lau7IsbDRbTFbr
|
|
|
70
70
|
sunholo/database/sql/sb/delete_source_row.sql,sha256=r6fEuUKdbiLHCDGKSbKINDCpJjsmfHZNNOo1ptwLLSo,75
|
|
71
71
|
sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUtYL5cCxbC2mj_c,255
|
|
72
72
|
sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
|
|
73
|
-
sunholo/discovery_engine/__init__.py,sha256=
|
|
73
|
+
sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8RJ9OyKdQ,130
|
|
74
74
|
sunholo/discovery_engine/chunker_handler.py,sha256=Fv4BLOBi_7ap3AiAy4TlTN48CLZSMurJ3TkvC75Euro,5123
|
|
75
75
|
sunholo/discovery_engine/create_new.py,sha256=NzhSh6nG6nQ5J9gZh8IDph4JiEVT_DC5GGvP0GuwTWs,943
|
|
76
|
-
sunholo/discovery_engine/discovery_engine_client.py,sha256=
|
|
77
|
-
sunholo/discovery_engine/get_ai_search_chunks.py,sha256=
|
|
76
|
+
sunholo/discovery_engine/discovery_engine_client.py,sha256=FjcKCIeLz40Xn8DqwHZuHCYp2-oOFHw-doy1v-ULnEk,21536
|
|
77
|
+
sunholo/discovery_engine/get_ai_search_chunks.py,sha256=6SO6v_4AcrUat0bP7wqC8xg9aY916Fnw_aZsogrLx-g,3877
|
|
78
78
|
sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
|
|
79
79
|
sunholo/embedder/embed_chunk.py,sha256=MCbTePWjUbIRVDFFhHJ94BvOZvIom62-mTr0PmfQyt0,6951
|
|
80
80
|
sunholo/excel/__init__.py,sha256=AqTMN9K4qJYi4maEgoORc5oxDVGO_eqmwzDaVP37JgY,56
|
|
@@ -144,9 +144,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
|
144
144
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
|
145
145
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
|
146
146
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
|
147
|
-
sunholo-0.96.
|
|
148
|
-
sunholo-0.96.
|
|
149
|
-
sunholo-0.96.
|
|
150
|
-
sunholo-0.96.
|
|
151
|
-
sunholo-0.96.
|
|
152
|
-
sunholo-0.96.
|
|
147
|
+
sunholo-0.96.9.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
|
148
|
+
sunholo-0.96.9.dist-info/METADATA,sha256=7-B4Hs2tNbUkmGbDYWPYfJJCWDpe6WtoLMxbok8zH4A,8404
|
|
149
|
+
sunholo-0.96.9.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
150
|
+
sunholo-0.96.9.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
|
151
|
+
sunholo-0.96.9.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
|
152
|
+
sunholo-0.96.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|