sunholo 0.69.2__py3-none-any.whl → 0.69.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/chunker/data_to_embed_pubsub.py +1 -1
- sunholo/discovery_engine/create_new.py +8 -1
- sunholo/discovery_engine/discovery_engine_client.py +35 -26
- {sunholo-0.69.2.dist-info → sunholo-0.69.4.dist-info}/METADATA +2 -2
- {sunholo-0.69.2.dist-info → sunholo-0.69.4.dist-info}/RECORD +9 -9
- {sunholo-0.69.2.dist-info → sunholo-0.69.4.dist-info}/LICENSE.txt +0 -0
- {sunholo-0.69.2.dist-info → sunholo-0.69.4.dist-info}/WHEEL +0 -0
- {sunholo-0.69.2.dist-info → sunholo-0.69.4.dist-info}/entry_points.txt +0 -0
- {sunholo-0.69.2.dist-info → sunholo-0.69.4.dist-info}/top_level.txt +0 -0
|
@@ -60,7 +60,7 @@ def process_chunker_data(message_data, metadata, vector_name):
|
|
|
60
60
|
metadata["vector_name"] = vector_name
|
|
61
61
|
|
|
62
62
|
if message_data is None:
|
|
63
|
-
log.error("No message_data was found in data: {
|
|
63
|
+
log.error(f"No message_data was found in data: {message_data}")
|
|
64
64
|
return
|
|
65
65
|
|
|
66
66
|
log.debug(f"Found metadata in pubsub: {metadata}")
|
|
@@ -6,6 +6,13 @@ def create_new_discovery_engine(vector_name):
|
|
|
6
6
|
|
|
7
7
|
chunker_config = load_config_key("chunker", vector_name=vector_name, kind="vacConfig")
|
|
8
8
|
|
|
9
|
+
if chunker_config:
|
|
10
|
+
chunk_size = chunker_config.get("chunk_size")
|
|
11
|
+
|
|
12
|
+
if not chunk_size:
|
|
13
|
+
chunk_size = 500
|
|
14
|
+
|
|
15
|
+
|
|
9
16
|
project_id = gcp_config.get('project_id')
|
|
10
17
|
#location = gcp_config.get('location')
|
|
11
18
|
|
|
@@ -16,6 +23,6 @@ def create_new_discovery_engine(vector_name):
|
|
|
16
23
|
#location=location
|
|
17
24
|
)
|
|
18
25
|
|
|
19
|
-
new_store = de.create_data_store(chunk_size=
|
|
26
|
+
new_store = de.create_data_store(chunk_size=chunk_size)
|
|
20
27
|
|
|
21
28
|
return new_store
|
|
@@ -61,16 +61,13 @@ class DiscoveryEngineClient:
|
|
|
61
61
|
if location != "global"
|
|
62
62
|
else None
|
|
63
63
|
)
|
|
64
|
-
self.
|
|
65
|
-
self.
|
|
66
|
-
|
|
67
|
-
location=location,
|
|
68
|
-
data_store=data_store_id,
|
|
69
|
-
branch="default_branch",
|
|
70
|
-
)
|
|
64
|
+
self.store_client = discoveryengine.DataStoreServiceClient(client_options=client_options)
|
|
65
|
+
self.doc_client = discoveryengine.DocumentServiceClient(client_options=client_options)
|
|
66
|
+
self.search_client = discoveryengine.SearchServiceClient(client_options=client_options)
|
|
71
67
|
|
|
72
68
|
def create_data_store(
|
|
73
|
-
self, chunk_size: int = 500
|
|
69
|
+
self, chunk_size: int = 500,
|
|
70
|
+
collection: str = "default_collection"
|
|
74
71
|
) -> str:
|
|
75
72
|
"""
|
|
76
73
|
Creates a new data store with default configuration.
|
|
@@ -109,9 +106,15 @@ class DiscoveryEngineClient:
|
|
|
109
106
|
document_processing_config=doc_config
|
|
110
107
|
)
|
|
111
108
|
|
|
109
|
+
parent = self.store_client.collection_path(
|
|
110
|
+
project=self.project_id,
|
|
111
|
+
location=self.location,
|
|
112
|
+
collection=collection,
|
|
113
|
+
)
|
|
114
|
+
|
|
112
115
|
# https://cloud.google.com/python/docs/reference/discoveryengine/0.11.4/google.cloud.discoveryengine_v1alpha.types.CreateDataStoreRequest
|
|
113
116
|
request = discoveryengine.CreateDataStoreRequest(
|
|
114
|
-
parent=
|
|
117
|
+
parent=parent,
|
|
115
118
|
data_store_id=self.data_store_id,
|
|
116
119
|
data_store=data_store,
|
|
117
120
|
# Optional: For Advanced Site Search Only
|
|
@@ -119,7 +122,7 @@ class DiscoveryEngineClient:
|
|
|
119
122
|
)
|
|
120
123
|
|
|
121
124
|
# Make the request
|
|
122
|
-
operation = self.
|
|
125
|
+
operation = self.store_client.create_data_store(request=request)
|
|
123
126
|
|
|
124
127
|
log.info(f"Waiting for operation to complete: {operation.operation.name}")
|
|
125
128
|
response = operation.result()
|
|
@@ -140,6 +143,7 @@ class DiscoveryEngineClient:
|
|
|
140
143
|
num_next_chunks: int = 3,
|
|
141
144
|
page_size: int = 10,
|
|
142
145
|
doc_or_chunks: str = "CHUNKS", # or DOCUMENTS
|
|
146
|
+
serving_config: str = "default_serving_config",
|
|
143
147
|
):
|
|
144
148
|
"""Retrieves chunks or documents based on a query.
|
|
145
149
|
|
|
@@ -161,22 +165,19 @@ class DiscoveryEngineClient:
|
|
|
161
165
|
print(f"Chunk: {chunk.snippet}, document name: {chunk.document_name}")
|
|
162
166
|
```
|
|
163
167
|
"""
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
168
|
+
|
|
169
|
+
serving_config_path = self.search_client.serving_config_path(
|
|
170
|
+
self.project_id,
|
|
171
|
+
self.location,
|
|
172
|
+
self.data_store_id,
|
|
173
|
+
serving_config
|
|
174
|
+
)
|
|
175
|
+
|
|
172
176
|
search_request = discoveryengine.SearchRequest(
|
|
173
|
-
serving_config=
|
|
177
|
+
serving_config=serving_config_path,
|
|
174
178
|
query=query,
|
|
175
179
|
page_size=page_size,
|
|
176
180
|
content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
|
|
177
|
-
#snippet_spec=discoveryengine.SearchRequest.ContentSearchSpec.SnippetSpec(
|
|
178
|
-
# return_snippet=True
|
|
179
|
-
#),
|
|
180
181
|
search_result_mode=doc_or_chunks, # CHUNKS or DOCUMENTS
|
|
181
182
|
chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
|
|
182
183
|
num_previous_chunks=num_previous_chunks,
|
|
@@ -185,13 +186,14 @@ class DiscoveryEngineClient:
|
|
|
185
186
|
),
|
|
186
187
|
)
|
|
187
188
|
|
|
188
|
-
search_response = self.
|
|
189
|
+
search_response = self.search_client.search(search_request)
|
|
189
190
|
|
|
190
191
|
return search_response
|
|
191
192
|
|
|
192
193
|
def import_documents(self,
|
|
193
194
|
gcs_uri: Optional[str] = None,
|
|
194
195
|
data_schema="content",
|
|
196
|
+
branch="default_branch",
|
|
195
197
|
bigquery_dataset: Optional[str] = None,
|
|
196
198
|
bigquery_table: Optional[str] = None,
|
|
197
199
|
bigquery_project_id: Optional[str] = None,
|
|
@@ -203,9 +205,16 @@ class DiscoveryEngineClient:
|
|
|
203
205
|
|
|
204
206
|
"""
|
|
205
207
|
|
|
208
|
+
parent = self.doc_client.branch_path(
|
|
209
|
+
self.project_id,
|
|
210
|
+
self.location,
|
|
211
|
+
self.data_store_id,
|
|
212
|
+
branch
|
|
213
|
+
)
|
|
214
|
+
|
|
206
215
|
if gcs_uri:
|
|
207
216
|
request = discoveryengine.ImportDocumentsRequest(
|
|
208
|
-
parent=
|
|
217
|
+
parent=parent,
|
|
209
218
|
# https://cloud.google.com/python/docs/reference/discoveryengine/latest/google.cloud.discoveryengine_v1alpha.types.GcsSource
|
|
210
219
|
gcs_source=discoveryengine.GcsSource(
|
|
211
220
|
input_uris=[gcs_uri], data_schema=data_schema,
|
|
@@ -215,7 +224,7 @@ class DiscoveryEngineClient:
|
|
|
215
224
|
)
|
|
216
225
|
else:
|
|
217
226
|
request = discoveryengine.ImportDocumentsRequest(
|
|
218
|
-
parent=
|
|
227
|
+
parent=parent,
|
|
219
228
|
bigquery_source=discoveryengine.BigQuerySource(
|
|
220
229
|
project_id=bigquery_project_id or self.project_id,
|
|
221
230
|
dataset_id=bigquery_dataset,
|
|
@@ -227,7 +236,7 @@ class DiscoveryEngineClient:
|
|
|
227
236
|
)
|
|
228
237
|
|
|
229
238
|
# Make the request
|
|
230
|
-
operation = self.
|
|
239
|
+
operation = self.doc_client.import_documents(request=request)
|
|
231
240
|
|
|
232
241
|
log.info(f"Waiting for operation to complete: {operation.operation.name}")
|
|
233
242
|
response = operation.result()
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sunholo
|
|
3
|
-
Version: 0.69.
|
|
3
|
+
Version: 0.69.4
|
|
4
4
|
Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
|
|
5
5
|
Home-page: https://github.com/sunholo-data/sunholo-py
|
|
6
|
-
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.69.
|
|
6
|
+
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.69.4.tar.gz
|
|
7
7
|
Author: Holosun ApS
|
|
8
8
|
Author-email: multivac@sunholo.com
|
|
9
9
|
License: Apache License, Version 2.0
|
|
@@ -24,7 +24,7 @@ sunholo/bots/discord.py,sha256=cCFae5K1BCa6JVkWGLh_iZ9qFO1JpXb6K4eJrlDfEro,2442
|
|
|
24
24
|
sunholo/bots/github_webhook.py,sha256=5pQPRLM_wxxcILVaIzUDV8Kt7Arcm2dL1r1kMMHA524,9629
|
|
25
25
|
sunholo/bots/webapp.py,sha256=EIMxdAJ_xtufwJmvnn7N_Fb_1hZ9DjhJ0Kf_hp02vEU,1926
|
|
26
26
|
sunholo/chunker/__init__.py,sha256=yWYwpejyYxDpZv1joTrFMsh2SWAkd0z7a1VKtmOfMhA,77
|
|
27
|
-
sunholo/chunker/data_to_embed_pubsub.py,sha256=
|
|
27
|
+
sunholo/chunker/data_to_embed_pubsub.py,sha256=tL9J7s-F4szmpbZYU4dpvLr5R-LgiuZLpRvhQnrTD1E,4203
|
|
28
28
|
sunholo/chunker/doc_handling.py,sha256=rIyknpzDyj5A0u_DqSQVD_CXLRNZPOU6TCL4bhCdjOI,8563
|
|
29
29
|
sunholo/chunker/images.py,sha256=Xmh1vwHrVhoXm5iH2dhCc52O8YgdzE8KrDSdL-pGnp8,1861
|
|
30
30
|
sunholo/chunker/loaders.py,sha256=xiToUVgPz2ZzcqpUAq7aNP3PTenb_rBUAFzu0JPycIg,10268
|
|
@@ -62,8 +62,8 @@ sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUt
|
|
|
62
62
|
sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
|
|
63
63
|
sunholo/discovery_engine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
64
64
|
sunholo/discovery_engine/chunker_handler.py,sha256=4NT4fRAWY-kKM4X3imZ2RISYLMvpIVsPDpsAgUOhdlA,4149
|
|
65
|
-
sunholo/discovery_engine/create_new.py,sha256=
|
|
66
|
-
sunholo/discovery_engine/discovery_engine_client.py,sha256=
|
|
65
|
+
sunholo/discovery_engine/create_new.py,sha256=6sowAvblLDBZq6skYwdc6n99YlyHvNLaIs6kYMnkdO8,921
|
|
66
|
+
sunholo/discovery_engine/discovery_engine_client.py,sha256=yjMu_MXYzXzQMSxp_PATDcM7PXlB8VTgJT56fRbD0xs,11693
|
|
67
67
|
sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
|
|
68
68
|
sunholo/embedder/embed_chunk.py,sha256=P744zUQJgqrjILunzaqtTerB9AwoXFU6tXBtz4rjWgQ,6673
|
|
69
69
|
sunholo/gcs/__init__.py,sha256=DtVw_AZwQn-IguR5BJuIi2XJeF_FQXizhJikzRNrXiE,50
|
|
@@ -111,9 +111,9 @@ sunholo/vertex/__init__.py,sha256=JvHcGFuv6R_nAhY2AdoqqhMpJ5ugeWPZ_svGhWrObBk,13
|
|
|
111
111
|
sunholo/vertex/init.py,sha256=JDMUaBRdednzbKF-5p33qqLit2LMsvgvWW-NRz0AqO0,1801
|
|
112
112
|
sunholo/vertex/memory_tools.py,sha256=8F1iTWnqEK9mX4W5RzCVKIjydIcNp6OFxjn_dtQ3GXo,5379
|
|
113
113
|
sunholo/vertex/safety.py,sha256=3meAX0HyGZYrH7rXPUAHxtI_3w_zoy_RX7Shtkoa660,1275
|
|
114
|
-
sunholo-0.69.
|
|
115
|
-
sunholo-0.69.
|
|
116
|
-
sunholo-0.69.
|
|
117
|
-
sunholo-0.69.
|
|
118
|
-
sunholo-0.69.
|
|
119
|
-
sunholo-0.69.
|
|
114
|
+
sunholo-0.69.4.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
|
115
|
+
sunholo-0.69.4.dist-info/METADATA,sha256=gaLbxVufOz5-8Juy1fMBHvw_BXCv5MiUPoSMI1hJotQ,6155
|
|
116
|
+
sunholo-0.69.4.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
|
|
117
|
+
sunholo-0.69.4.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
|
118
|
+
sunholo-0.69.4.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
|
119
|
+
sunholo-0.69.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|