sunholo 0.69.9__py3-none-any.whl → 0.69.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/discovery_engine/chunker_handler.py +2 -0
- sunholo/discovery_engine/discovery_engine_client.py +63 -2
- sunholo/llamaindex/import_files.py +0 -1
- sunholo/logging.py +7 -1
- {sunholo-0.69.9.dist-info → sunholo-0.69.12.dist-info}/METADATA +4 -2
- {sunholo-0.69.9.dist-info → sunholo-0.69.12.dist-info}/RECORD +10 -10
- {sunholo-0.69.9.dist-info → sunholo-0.69.12.dist-info}/LICENSE.txt +0 -0
- {sunholo-0.69.9.dist-info → sunholo-0.69.12.dist-info}/WHEEL +0 -0
- {sunholo-0.69.9.dist-info → sunholo-0.69.12.dist-info}/entry_points.txt +0 -0
- {sunholo-0.69.9.dist-info → sunholo-0.69.12.dist-info}/top_level.txt +0 -0
|
@@ -51,6 +51,8 @@ def do_discovery_engine(message_data, metadata, vector_name):
|
|
|
51
51
|
|
|
52
52
|
if message_data.startswith("gs://"):
|
|
53
53
|
log.info(f"DiscoveryEngineClient.import_files for {message_data}")
|
|
54
|
+
if "/pdf_parts/" in message_data:
|
|
55
|
+
return None
|
|
54
56
|
for corp in corpuses:
|
|
55
57
|
try:
|
|
56
58
|
response = corp.import_documents(
|
|
@@ -155,6 +155,7 @@ class DiscoveryEngineClient:
|
|
|
155
155
|
num_previous_chunks: int = 3,
|
|
156
156
|
num_next_chunks: int = 3,
|
|
157
157
|
page_size: int = 10,
|
|
158
|
+
parse_chunks_to_string: bool = True,
|
|
158
159
|
doc_or_chunks: str = "CHUNKS", # or DOCUMENTS
|
|
159
160
|
serving_config: str = "default_serving_config",
|
|
160
161
|
):
|
|
@@ -166,6 +167,7 @@ class DiscoveryEngineClient:
|
|
|
166
167
|
num_previous_chunks (int, optional): Number of previous chunks to return for context (default is 3).
|
|
167
168
|
num_next_chunks (int, optional): Number of next chunks to return for context (default is 3).
|
|
168
169
|
page_size (int, optional): The maximum number of results to return per page (default is 10).
|
|
170
|
+
parse_chunks_to_string: If True will put chunks in one big string, False will return object
|
|
169
171
|
|
|
170
172
|
Returns:
|
|
171
173
|
discoveryengine.SearchResponse: The search response object containing the search results.
|
|
@@ -199,10 +201,69 @@ class DiscoveryEngineClient:
|
|
|
199
201
|
),
|
|
200
202
|
)
|
|
201
203
|
|
|
204
|
+
log.info(f"Discovery engine request: {search_request=}")
|
|
202
205
|
search_response = self.search_client.search(search_request)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
if parse_chunks_to_string:
|
|
209
|
+
|
|
210
|
+
big_string = self.process_chunks(search_response)
|
|
211
|
+
log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
|
|
203
212
|
|
|
213
|
+
return big_string
|
|
214
|
+
|
|
215
|
+
log.info("Discovery engine response object")
|
|
204
216
|
return search_response
|
|
205
217
|
|
|
218
|
+
def process_chunks(self, response):
|
|
219
|
+
all_chunks = []
|
|
220
|
+
|
|
221
|
+
if 'results' not in response:
|
|
222
|
+
raise ValueError(f'No results found in response: {response=}')
|
|
223
|
+
|
|
224
|
+
for result in response['results']:
|
|
225
|
+
chunk = result['chunk']
|
|
226
|
+
chunk_metadata = chunk['chunkMetadata']
|
|
227
|
+
|
|
228
|
+
if 'previousChunks' in chunk_metadata:
|
|
229
|
+
# Process previous chunks
|
|
230
|
+
for prev_chunk in chunk['chunkMetadata']['previousChunks']:
|
|
231
|
+
prev_chunk_string = (
|
|
232
|
+
f"# {prev_chunk['id']}\n"
|
|
233
|
+
f"{prev_chunk['content']}\n"
|
|
234
|
+
f"## metadata\n"
|
|
235
|
+
f"Document URI: {prev_chunk['documentMetadata']['uri']}\n"
|
|
236
|
+
f"Document Title: {prev_chunk['documentMetadata']['title']}\n"
|
|
237
|
+
)
|
|
238
|
+
all_chunks.append(prev_chunk_string)
|
|
239
|
+
|
|
240
|
+
# Process fetched chunk
|
|
241
|
+
fetched_chunk_string = (
|
|
242
|
+
f"# {chunk['id']}\n"
|
|
243
|
+
f"{chunk['content']}\n"
|
|
244
|
+
f"## metadata\n"
|
|
245
|
+
f"Document URI: {chunk['documentMetadata']['uri']}\n"
|
|
246
|
+
f"Document Title: {chunk['documentMetadata']['title']}\n"
|
|
247
|
+
)
|
|
248
|
+
all_chunks.append(fetched_chunk_string)
|
|
249
|
+
|
|
250
|
+
# Process next chunks
|
|
251
|
+
if 'nextChunks' in chunk_metadata:
|
|
252
|
+
for next_chunk in chunk_metadata['nextChunks']:
|
|
253
|
+
next_chunk_string = (
|
|
254
|
+
f"# {next_chunk['id']}\n"
|
|
255
|
+
f"{next_chunk['content']}\n"
|
|
256
|
+
f"## metadata\n"
|
|
257
|
+
f"Document URI: {next_chunk['documentMetadata']['uri']}\n"
|
|
258
|
+
f"Document Title: {next_chunk['documentMetadata']['title']}\n"
|
|
259
|
+
)
|
|
260
|
+
all_chunks.append(next_chunk_string)
|
|
261
|
+
|
|
262
|
+
# Combine all chunks into one long string
|
|
263
|
+
result_string = "\n".join(all_chunks)
|
|
264
|
+
|
|
265
|
+
return result_string
|
|
266
|
+
|
|
206
267
|
def import_documents(self,
|
|
207
268
|
gcs_uri: Optional[str] = None,
|
|
208
269
|
data_schema="content",
|
|
@@ -256,9 +317,9 @@ class DiscoveryEngineClient:
|
|
|
256
317
|
try:
|
|
257
318
|
operation = import_documents_with_retry(self.doc_client, request)
|
|
258
319
|
except ResourceExhausted as e:
|
|
259
|
-
|
|
320
|
+
log.error(f"Operation failed after retries due to quota exceeded: {e}")
|
|
260
321
|
except Exception as e:
|
|
261
|
-
|
|
322
|
+
log.error(f"An unexpected error occurred: {e}")
|
|
262
323
|
|
|
263
324
|
return operation.operation.name
|
|
264
325
|
|
|
@@ -61,7 +61,6 @@ def do_llamaindex(message_data, metadata, vector_name):
|
|
|
61
61
|
corpuses = []
|
|
62
62
|
for memory in memories:
|
|
63
63
|
for key, value in memory.items(): # Now iterate over the dictionary
|
|
64
|
-
log.info(f"Found memory {key}")
|
|
65
64
|
vectorstore = value.get('vectorstore')
|
|
66
65
|
if vectorstore == "llamaindex":
|
|
67
66
|
log.info(f"Found vectorstore {vectorstore}")
|
sunholo/logging.py
CHANGED
|
@@ -119,8 +119,14 @@ class GoogleCloudLogging:
|
|
|
119
119
|
if log_text:
|
|
120
120
|
if isinstance(log_struct, dict):
|
|
121
121
|
logger.log_struct(log_struct, severity=severity, source_location=caller_info)
|
|
122
|
-
|
|
122
|
+
elif isinstance(log_struct, str):
|
|
123
123
|
logger.log_text(log_text, severity=severity, source_location=caller_info)
|
|
124
|
+
else:
|
|
125
|
+
try:
|
|
126
|
+
turn_to_text = str(log_text)
|
|
127
|
+
logger.log_text(turn_to_text, severity=severity, source_location=caller_info)
|
|
128
|
+
except Exception as err:
|
|
129
|
+
print(f"Could not log this: {log_text=} - {str(err)}")
|
|
124
130
|
|
|
125
131
|
elif log_struct:
|
|
126
132
|
if not isinstance(log_struct, dict):
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sunholo
|
|
3
|
-
Version: 0.69.
|
|
3
|
+
Version: 0.69.12
|
|
4
4
|
Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
|
|
5
5
|
Home-page: https://github.com/sunholo-data/sunholo-py
|
|
6
|
-
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.69.
|
|
6
|
+
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.69.12.tar.gz
|
|
7
7
|
Author: Holosun ApS
|
|
8
8
|
Author-email: multivac@sunholo.com
|
|
9
9
|
License: Apache License, Version 2.0
|
|
@@ -61,6 +61,7 @@ Requires-Dist: pypdf ; extra == 'all'
|
|
|
61
61
|
Requires-Dist: python-socketio ; extra == 'all'
|
|
62
62
|
Requires-Dist: rich ; extra == 'all'
|
|
63
63
|
Requires-Dist: supabase ; extra == 'all'
|
|
64
|
+
Requires-Dist: tantivy ; extra == 'all'
|
|
64
65
|
Requires-Dist: tiktoken ; extra == 'all'
|
|
65
66
|
Provides-Extra: anthropic
|
|
66
67
|
Requires-Dist: langchain-anthropic >=0.1.13 ; extra == 'anthropic'
|
|
@@ -75,6 +76,7 @@ Requires-Dist: pg8000 ; extra == 'database'
|
|
|
75
76
|
Requires-Dist: pgvector ; extra == 'database'
|
|
76
77
|
Requires-Dist: psycopg2-binary ; extra == 'database'
|
|
77
78
|
Requires-Dist: lancedb ; extra == 'database'
|
|
79
|
+
Requires-Dist: tantivy ; extra == 'database'
|
|
78
80
|
Provides-Extra: gcp
|
|
79
81
|
Requires-Dist: google-auth-httplib2 ; extra == 'gcp'
|
|
80
82
|
Requires-Dist: google-auth-oauthlib ; extra == 'gcp'
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
sunholo/__init__.py,sha256=0CdpufyRKWyZe7J7UKigL6j_qOorM-p0OjHIAuf9M38,864
|
|
2
|
-
sunholo/logging.py,sha256=
|
|
2
|
+
sunholo/logging.py,sha256=UUBl0_oBrW21O5cNAT5lYZ2OmAnVJ92PnAwtA_2Sz_g,12117
|
|
3
3
|
sunholo/agents/__init__.py,sha256=Hb4NXy2rN-83Z0-UDRwX-LXv2R29lcbSFPf8G6q4fZg,380
|
|
4
4
|
sunholo/agents/chat_history.py,sha256=8iX1bgvRW6fdp6r_DQR_caPHYrZ_9QJJgPxCiSDf3q8,5380
|
|
5
5
|
sunholo/agents/dispatch_to_qa.py,sha256=nFNdxhkr7rVYuUwVoBCBNYBI2Dke6-_z_ZApBEWb_cU,8291
|
|
@@ -61,9 +61,9 @@ sunholo/database/sql/sb/delete_source_row.sql,sha256=r6fEuUKdbiLHCDGKSbKINDCpJjs
|
|
|
61
61
|
sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUtYL5cCxbC2mj_c,255
|
|
62
62
|
sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
|
|
63
63
|
sunholo/discovery_engine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
64
|
-
sunholo/discovery_engine/chunker_handler.py,sha256=
|
|
64
|
+
sunholo/discovery_engine/chunker_handler.py,sha256=puNnV6vKnjNqk28FLnsIPMNsC-1Y6s20PK7ioi8qwzc,4491
|
|
65
65
|
sunholo/discovery_engine/create_new.py,sha256=7oZG78T6lW0EspRzlo7-qRyXFSuFxDn2dfSAVEaqlqY,978
|
|
66
|
-
sunholo/discovery_engine/discovery_engine_client.py,sha256=
|
|
66
|
+
sunholo/discovery_engine/discovery_engine_client.py,sha256=WFi0h-JfZd0eKtW6uzm6zfffkGMP2iKBHZU1Tw1uiCw,14944
|
|
67
67
|
sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
|
|
68
68
|
sunholo/embedder/embed_chunk.py,sha256=P744zUQJgqrjILunzaqtTerB9AwoXFU6tXBtz4rjWgQ,6673
|
|
69
69
|
sunholo/gcs/__init__.py,sha256=DtVw_AZwQn-IguR5BJuIi2XJeF_FQXizhJikzRNrXiE,50
|
|
@@ -76,7 +76,7 @@ sunholo/langfuse/prompts.py,sha256=HO4Zy9usn5tKooBPCKksuw4Lff3c03Ny5wqn4ce_xZM,1
|
|
|
76
76
|
sunholo/llamaindex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
77
|
sunholo/llamaindex/generate.py,sha256=l1Picr-hVwkmAUD7XmTCa63qY9ERliFHQXwyX3BqB2Q,686
|
|
78
78
|
sunholo/llamaindex/get_files.py,sha256=6rhXCDqQ_lrIapISQ_OYQDjiSATXvS_9m3qq53-oIl0,781
|
|
79
|
-
sunholo/llamaindex/import_files.py,sha256=
|
|
79
|
+
sunholo/llamaindex/import_files.py,sha256=LIFBXxgXgRYMdVpq3FkycIoMMhLSXkKdKzK7qDuqOmQ,5566
|
|
80
80
|
sunholo/lookup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
81
81
|
sunholo/lookup/model_lookup.yaml,sha256=O7o-jP53MLA06C8pI-ILwERShO-xf6z_258wtpZBv6A,739
|
|
82
82
|
sunholo/patches/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -111,9 +111,9 @@ sunholo/vertex/__init__.py,sha256=JvHcGFuv6R_nAhY2AdoqqhMpJ5ugeWPZ_svGhWrObBk,13
|
|
|
111
111
|
sunholo/vertex/init.py,sha256=JDMUaBRdednzbKF-5p33qqLit2LMsvgvWW-NRz0AqO0,1801
|
|
112
112
|
sunholo/vertex/memory_tools.py,sha256=8F1iTWnqEK9mX4W5RzCVKIjydIcNp6OFxjn_dtQ3GXo,5379
|
|
113
113
|
sunholo/vertex/safety.py,sha256=3meAX0HyGZYrH7rXPUAHxtI_3w_zoy_RX7Shtkoa660,1275
|
|
114
|
-
sunholo-0.69.
|
|
115
|
-
sunholo-0.69.
|
|
116
|
-
sunholo-0.69.
|
|
117
|
-
sunholo-0.69.
|
|
118
|
-
sunholo-0.69.
|
|
119
|
-
sunholo-0.69.
|
|
114
|
+
sunholo-0.69.12.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
|
115
|
+
sunholo-0.69.12.dist-info/METADATA,sha256=w1SqSvez7MqPL3PlMn4PIrhlFDru7p8XLU_DPiE8_AI,6242
|
|
116
|
+
sunholo-0.69.12.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
|
|
117
|
+
sunholo-0.69.12.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
|
118
|
+
sunholo-0.69.12.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
|
119
|
+
sunholo-0.69.12.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|