sunholo 0.69.8__py3-none-any.whl → 0.69.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/discovery_engine/chunker_handler.py +2 -7
- sunholo/discovery_engine/create_new.py +2 -3
- sunholo/discovery_engine/discovery_engine_client.py +55 -0
- sunholo/llamaindex/import_files.py +0 -1
- {sunholo-0.69.8.dist-info → sunholo-0.69.10.dist-info}/METADATA +2 -2
- {sunholo-0.69.8.dist-info → sunholo-0.69.10.dist-info}/RECORD +10 -10
- {sunholo-0.69.8.dist-info → sunholo-0.69.10.dist-info}/LICENSE.txt +0 -0
- {sunholo-0.69.8.dist-info → sunholo-0.69.10.dist-info}/WHEEL +0 -0
- {sunholo-0.69.8.dist-info → sunholo-0.69.10.dist-info}/entry_points.txt +0 -0
- {sunholo-0.69.8.dist-info → sunholo-0.69.10.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from ..logging import log
|
|
2
2
|
from ..utils.config import load_config_key
|
|
3
|
+
from ..utils.gcp_project import get_gcp_project
|
|
3
4
|
from ..components import load_memories
|
|
4
5
|
|
|
5
6
|
from .discovery_engine_client import DiscoveryEngineClient
|
|
@@ -20,11 +21,6 @@ def do_discovery_engine(message_data, metadata, vector_name):
|
|
|
20
21
|
```
|
|
21
22
|
"""
|
|
22
23
|
|
|
23
|
-
global_gcp_config = load_config_key("gcp_config", vector_name="global", kind="vacConfig")
|
|
24
|
-
gcp_config = load_config_key("gcp_config", vector_name=vector_name, kind="vacConfig")
|
|
25
|
-
if not gcp_config and not global_gcp_config:
|
|
26
|
-
raise ValueError(f"Need config.{vector_name}.gcp_config to configure discovery engine")
|
|
27
|
-
|
|
28
24
|
memories = load_memories(vector_name)
|
|
29
25
|
tools = []
|
|
30
26
|
|
|
@@ -38,11 +34,10 @@ def do_discovery_engine(message_data, metadata, vector_name):
|
|
|
38
34
|
vectorstore = value.get('vectorstore')
|
|
39
35
|
if vectorstore == "discovery_engine" or vectorstore == "vertex_ai_search":
|
|
40
36
|
log.info(f"Found vectorstore {vectorstore}")
|
|
41
|
-
project_id = gcp_config.get('project_id') or global_gcp_config['project_id']
|
|
42
37
|
#location = gcp_config.get('location')
|
|
43
38
|
corpus = DiscoveryEngineClient(
|
|
44
39
|
data_store_id=vector_name,
|
|
45
|
-
project_id=
|
|
40
|
+
project_id=get_gcp_project(),
|
|
46
41
|
# location needs to be 'eu' or 'us' which doesn't work with other configurations
|
|
47
42
|
#location=location or global_location
|
|
48
43
|
)
|
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
from .discovery_engine_client import DiscoveryEngineClient
|
|
2
2
|
from ..utils.config import load_config_key
|
|
3
|
+
from ..utils.gcp_project import get_gcp_project
|
|
3
4
|
|
|
4
5
|
def create_new_discovery_engine(vector_name):
|
|
5
|
-
global_config = load_config_key("gcp_config", "global", kind="vacConfig")
|
|
6
|
-
gcp_config = load_config_key("gcp_config", vector_name=vector_name, kind="vacConfig")
|
|
7
6
|
|
|
8
7
|
chunker_config = load_config_key("chunker", vector_name=vector_name, kind="vacConfig")
|
|
9
8
|
|
|
@@ -12,7 +11,7 @@ def create_new_discovery_engine(vector_name):
|
|
|
12
11
|
if "chunk_size" in chunker_config:
|
|
13
12
|
chunk_size = chunker_config["chunk_size"]
|
|
14
13
|
|
|
15
|
-
project_id =
|
|
14
|
+
project_id = get_gcp_project()
|
|
16
15
|
if not project_id:
|
|
17
16
|
raise ValueError("Could not find project_id in gcp_config")
|
|
18
17
|
|
|
@@ -155,6 +155,7 @@ class DiscoveryEngineClient:
|
|
|
155
155
|
num_previous_chunks: int = 3,
|
|
156
156
|
num_next_chunks: int = 3,
|
|
157
157
|
page_size: int = 10,
|
|
158
|
+
parse_chunks_to_string: bool = True,
|
|
158
159
|
doc_or_chunks: str = "CHUNKS", # or DOCUMENTS
|
|
159
160
|
serving_config: str = "default_serving_config",
|
|
160
161
|
):
|
|
@@ -166,6 +167,7 @@ class DiscoveryEngineClient:
|
|
|
166
167
|
num_previous_chunks (int, optional): Number of previous chunks to return for context (default is 3).
|
|
167
168
|
num_next_chunks (int, optional): Number of next chunks to return for context (default is 3).
|
|
168
169
|
page_size (int, optional): The maximum number of results to return per page (default is 10).
|
|
170
|
+
parse_chunks_to_string: If True will put chunks in one big string, False will return object
|
|
169
171
|
|
|
170
172
|
Returns:
|
|
171
173
|
discoveryengine.SearchResponse: The search response object containing the search results.
|
|
@@ -201,8 +203,61 @@ class DiscoveryEngineClient:
|
|
|
201
203
|
|
|
202
204
|
search_response = self.search_client.search(search_request)
|
|
203
205
|
|
|
206
|
+
if parse_chunks_to_string:
|
|
207
|
+
|
|
208
|
+
return self.process_chunks(search_response)
|
|
209
|
+
|
|
204
210
|
return search_response
|
|
205
211
|
|
|
212
|
+
def process_chunks(self, response):
|
|
213
|
+
all_chunks = []
|
|
214
|
+
|
|
215
|
+
if 'results' not in response:
|
|
216
|
+
raise ValueError('No results found in response')
|
|
217
|
+
|
|
218
|
+
for result in response['results']:
|
|
219
|
+
chunk = result['chunk']
|
|
220
|
+
chunk_metadata = chunk['chunkMetadata']
|
|
221
|
+
|
|
222
|
+
if 'previousChunks' in chunk_metadata:
|
|
223
|
+
# Process previous chunks
|
|
224
|
+
for prev_chunk in chunk['chunkMetadata']['previousChunks']:
|
|
225
|
+
prev_chunk_string = (
|
|
226
|
+
f"# {prev_chunk['id']}\n"
|
|
227
|
+
f"{prev_chunk['content']}\n"
|
|
228
|
+
f"## metadata\n"
|
|
229
|
+
f"Document URI: {prev_chunk['documentMetadata']['uri']}\n"
|
|
230
|
+
f"Document Title: {prev_chunk['documentMetadata']['title']}\n"
|
|
231
|
+
)
|
|
232
|
+
all_chunks.append(prev_chunk_string)
|
|
233
|
+
|
|
234
|
+
# Process fetched chunk
|
|
235
|
+
fetched_chunk_string = (
|
|
236
|
+
f"# {chunk['id']}\n"
|
|
237
|
+
f"{chunk['content']}\n"
|
|
238
|
+
f"## metadata\n"
|
|
239
|
+
f"Document URI: {chunk['documentMetadata']['uri']}\n"
|
|
240
|
+
f"Document Title: {chunk['documentMetadata']['title']}\n"
|
|
241
|
+
)
|
|
242
|
+
all_chunks.append(fetched_chunk_string)
|
|
243
|
+
|
|
244
|
+
# Process next chunks
|
|
245
|
+
if 'nextChunks' in chunk_metadata:
|
|
246
|
+
for next_chunk in chunk_metadata['nextChunks']:
|
|
247
|
+
next_chunk_string = (
|
|
248
|
+
f"# {next_chunk['id']}\n"
|
|
249
|
+
f"{next_chunk['content']}\n"
|
|
250
|
+
f"## metadata\n"
|
|
251
|
+
f"Document URI: {next_chunk['documentMetadata']['uri']}\n"
|
|
252
|
+
f"Document Title: {next_chunk['documentMetadata']['title']}\n"
|
|
253
|
+
)
|
|
254
|
+
all_chunks.append(next_chunk_string)
|
|
255
|
+
|
|
256
|
+
# Combine all chunks into one long string
|
|
257
|
+
result_string = "\n".join(all_chunks)
|
|
258
|
+
|
|
259
|
+
return result_string
|
|
260
|
+
|
|
206
261
|
def import_documents(self,
|
|
207
262
|
gcs_uri: Optional[str] = None,
|
|
208
263
|
data_schema="content",
|
|
@@ -61,7 +61,6 @@ def do_llamaindex(message_data, metadata, vector_name):
|
|
|
61
61
|
corpuses = []
|
|
62
62
|
for memory in memories:
|
|
63
63
|
for key, value in memory.items(): # Now iterate over the dictionary
|
|
64
|
-
log.info(f"Found memory {key}")
|
|
65
64
|
vectorstore = value.get('vectorstore')
|
|
66
65
|
if vectorstore == "llamaindex":
|
|
67
66
|
log.info(f"Found vectorstore {vectorstore}")
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sunholo
|
|
3
|
-
Version: 0.69.
|
|
3
|
+
Version: 0.69.10
|
|
4
4
|
Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
|
|
5
5
|
Home-page: https://github.com/sunholo-data/sunholo-py
|
|
6
|
-
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.69.
|
|
6
|
+
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.69.10.tar.gz
|
|
7
7
|
Author: Holosun ApS
|
|
8
8
|
Author-email: multivac@sunholo.com
|
|
9
9
|
License: Apache License, Version 2.0
|
|
@@ -61,9 +61,9 @@ sunholo/database/sql/sb/delete_source_row.sql,sha256=r6fEuUKdbiLHCDGKSbKINDCpJjs
|
|
|
61
61
|
sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUtYL5cCxbC2mj_c,255
|
|
62
62
|
sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
|
|
63
63
|
sunholo/discovery_engine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
64
|
-
sunholo/discovery_engine/chunker_handler.py,sha256=
|
|
65
|
-
sunholo/discovery_engine/create_new.py,sha256=
|
|
66
|
-
sunholo/discovery_engine/discovery_engine_client.py,sha256=
|
|
64
|
+
sunholo/discovery_engine/chunker_handler.py,sha256=9zv72CxgKlKpOcSAt-XFtq0Ra-Z6tBTwMuXQhRPE5mY,4425
|
|
65
|
+
sunholo/discovery_engine/create_new.py,sha256=7oZG78T6lW0EspRzlo7-qRyXFSuFxDn2dfSAVEaqlqY,978
|
|
66
|
+
sunholo/discovery_engine/discovery_engine_client.py,sha256=xgRv1WMYgjjKE5e2SwV3ZeaodzQ1twSoaRwfQkiTOSo,14687
|
|
67
67
|
sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
|
|
68
68
|
sunholo/embedder/embed_chunk.py,sha256=P744zUQJgqrjILunzaqtTerB9AwoXFU6tXBtz4rjWgQ,6673
|
|
69
69
|
sunholo/gcs/__init__.py,sha256=DtVw_AZwQn-IguR5BJuIi2XJeF_FQXizhJikzRNrXiE,50
|
|
@@ -76,7 +76,7 @@ sunholo/langfuse/prompts.py,sha256=HO4Zy9usn5tKooBPCKksuw4Lff3c03Ny5wqn4ce_xZM,1
|
|
|
76
76
|
sunholo/llamaindex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
77
|
sunholo/llamaindex/generate.py,sha256=l1Picr-hVwkmAUD7XmTCa63qY9ERliFHQXwyX3BqB2Q,686
|
|
78
78
|
sunholo/llamaindex/get_files.py,sha256=6rhXCDqQ_lrIapISQ_OYQDjiSATXvS_9m3qq53-oIl0,781
|
|
79
|
-
sunholo/llamaindex/import_files.py,sha256=
|
|
79
|
+
sunholo/llamaindex/import_files.py,sha256=LIFBXxgXgRYMdVpq3FkycIoMMhLSXkKdKzK7qDuqOmQ,5566
|
|
80
80
|
sunholo/lookup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
81
81
|
sunholo/lookup/model_lookup.yaml,sha256=O7o-jP53MLA06C8pI-ILwERShO-xf6z_258wtpZBv6A,739
|
|
82
82
|
sunholo/patches/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -111,9 +111,9 @@ sunholo/vertex/__init__.py,sha256=JvHcGFuv6R_nAhY2AdoqqhMpJ5ugeWPZ_svGhWrObBk,13
|
|
|
111
111
|
sunholo/vertex/init.py,sha256=JDMUaBRdednzbKF-5p33qqLit2LMsvgvWW-NRz0AqO0,1801
|
|
112
112
|
sunholo/vertex/memory_tools.py,sha256=8F1iTWnqEK9mX4W5RzCVKIjydIcNp6OFxjn_dtQ3GXo,5379
|
|
113
113
|
sunholo/vertex/safety.py,sha256=3meAX0HyGZYrH7rXPUAHxtI_3w_zoy_RX7Shtkoa660,1275
|
|
114
|
-
sunholo-0.69.
|
|
115
|
-
sunholo-0.69.
|
|
116
|
-
sunholo-0.69.
|
|
117
|
-
sunholo-0.69.
|
|
118
|
-
sunholo-0.69.
|
|
119
|
-
sunholo-0.69.
|
|
114
|
+
sunholo-0.69.10.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
|
115
|
+
sunholo-0.69.10.dist-info/METADATA,sha256=t0FhARCqbXEulRWUtzijoEFrssaFlYtG5dBG8xzEpg4,6157
|
|
116
|
+
sunholo-0.69.10.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
|
|
117
|
+
sunholo-0.69.10.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
|
118
|
+
sunholo-0.69.10.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
|
119
|
+
sunholo-0.69.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|