sunholo 0.69.9__py3-none-any.whl → 0.69.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -51,6 +51,8 @@ def do_discovery_engine(message_data, metadata, vector_name):
51
51
 
52
52
  if message_data.startswith("gs://"):
53
53
  log.info(f"DiscoveryEngineClient.import_files for {message_data}")
54
+ if "/pdf_parts/" in message_data:
55
+ return None
54
56
  for corp in corpuses:
55
57
  try:
56
58
  response = corp.import_documents(
@@ -155,6 +155,7 @@ class DiscoveryEngineClient:
155
155
  num_previous_chunks: int = 3,
156
156
  num_next_chunks: int = 3,
157
157
  page_size: int = 10,
158
+ parse_chunks_to_string: bool = True,
158
159
  doc_or_chunks: str = "CHUNKS", # or DOCUMENTS
159
160
  serving_config: str = "default_serving_config",
160
161
  ):
@@ -166,6 +167,7 @@ class DiscoveryEngineClient:
166
167
  num_previous_chunks (int, optional): Number of previous chunks to return for context (default is 3).
167
168
  num_next_chunks (int, optional): Number of next chunks to return for context (default is 3).
168
169
  page_size (int, optional): The maximum number of results to return per page (default is 10).
170
+ parse_chunks_to_string: If True will put chunks in one big string, False will return object
169
171
 
170
172
  Returns:
171
173
  discoveryengine.SearchResponse: The search response object containing the search results.
@@ -199,10 +201,69 @@ class DiscoveryEngineClient:
199
201
  ),
200
202
  )
201
203
 
204
+ log.info(f"Discovery engine request: {search_request=}")
202
205
  search_response = self.search_client.search(search_request)
206
+
207
+
208
+ if parse_chunks_to_string:
209
+
210
+ big_string = self.process_chunks(search_response)
211
+ log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
203
212
 
213
+ return big_string
214
+
215
+ log.info("Discovery engine response object")
204
216
  return search_response
205
217
 
218
+ def process_chunks(self, response):
219
+ all_chunks = []
220
+
221
+ if 'results' not in response:
222
+ raise ValueError(f'No results found in response: {response=}')
223
+
224
+ for result in response['results']:
225
+ chunk = result['chunk']
226
+ chunk_metadata = chunk['chunkMetadata']
227
+
228
+ if 'previousChunks' in chunk_metadata:
229
+ # Process previous chunks
230
+ for prev_chunk in chunk['chunkMetadata']['previousChunks']:
231
+ prev_chunk_string = (
232
+ f"# {prev_chunk['id']}\n"
233
+ f"{prev_chunk['content']}\n"
234
+ f"## metadata\n"
235
+ f"Document URI: {prev_chunk['documentMetadata']['uri']}\n"
236
+ f"Document Title: {prev_chunk['documentMetadata']['title']}\n"
237
+ )
238
+ all_chunks.append(prev_chunk_string)
239
+
240
+ # Process fetched chunk
241
+ fetched_chunk_string = (
242
+ f"# {chunk['id']}\n"
243
+ f"{chunk['content']}\n"
244
+ f"## metadata\n"
245
+ f"Document URI: {chunk['documentMetadata']['uri']}\n"
246
+ f"Document Title: {chunk['documentMetadata']['title']}\n"
247
+ )
248
+ all_chunks.append(fetched_chunk_string)
249
+
250
+ # Process next chunks
251
+ if 'nextChunks' in chunk_metadata:
252
+ for next_chunk in chunk_metadata['nextChunks']:
253
+ next_chunk_string = (
254
+ f"# {next_chunk['id']}\n"
255
+ f"{next_chunk['content']}\n"
256
+ f"## metadata\n"
257
+ f"Document URI: {next_chunk['documentMetadata']['uri']}\n"
258
+ f"Document Title: {next_chunk['documentMetadata']['title']}\n"
259
+ )
260
+ all_chunks.append(next_chunk_string)
261
+
262
+ # Combine all chunks into one long string
263
+ result_string = "\n".join(all_chunks)
264
+
265
+ return result_string
266
+
206
267
  def import_documents(self,
207
268
  gcs_uri: Optional[str] = None,
208
269
  data_schema="content",
@@ -256,9 +317,9 @@ class DiscoveryEngineClient:
256
317
  try:
257
318
  operation = import_documents_with_retry(self.doc_client, request)
258
319
  except ResourceExhausted as e:
259
- print(f"Operation failed after retries due to quota exceeded: {e}")
320
+ log.error(f"Operation failed after retries due to quota exceeded: {e}")
260
321
  except Exception as e:
261
- print(f"An unexpected error occurred: {e}")
322
+ log.error(f"An unexpected error occurred: {e}")
262
323
 
263
324
  return operation.operation.name
264
325
 
@@ -61,7 +61,6 @@ def do_llamaindex(message_data, metadata, vector_name):
61
61
  corpuses = []
62
62
  for memory in memories:
63
63
  for key, value in memory.items(): # Now iterate over the dictionary
64
- log.info(f"Found memory {key}")
65
64
  vectorstore = value.get('vectorstore')
66
65
  if vectorstore == "llamaindex":
67
66
  log.info(f"Found vectorstore {vectorstore}")
sunholo/logging.py CHANGED
@@ -119,8 +119,14 @@ class GoogleCloudLogging:
119
119
  if log_text:
120
120
  if isinstance(log_struct, dict):
121
121
  logger.log_struct(log_struct, severity=severity, source_location=caller_info)
122
- else:
122
+ elif isinstance(log_struct, str):
123
123
  logger.log_text(log_text, severity=severity, source_location=caller_info)
124
+ else:
125
+ try:
126
+ turn_to_text = str(log_text)
127
+ logger.log_text(turn_to_text, severity=severity, source_location=caller_info)
128
+ except Exception as err:
129
+ print(f"Could not log this: {log_text=} - {str(err)}")
124
130
 
125
131
  elif log_struct:
126
132
  if not isinstance(log_struct, dict):
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.69.9
3
+ Version: 0.69.12
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.69.9.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.69.12.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -61,6 +61,7 @@ Requires-Dist: pypdf ; extra == 'all'
61
61
  Requires-Dist: python-socketio ; extra == 'all'
62
62
  Requires-Dist: rich ; extra == 'all'
63
63
  Requires-Dist: supabase ; extra == 'all'
64
+ Requires-Dist: tantivy ; extra == 'all'
64
65
  Requires-Dist: tiktoken ; extra == 'all'
65
66
  Provides-Extra: anthropic
66
67
  Requires-Dist: langchain-anthropic >=0.1.13 ; extra == 'anthropic'
@@ -75,6 +76,7 @@ Requires-Dist: pg8000 ; extra == 'database'
75
76
  Requires-Dist: pgvector ; extra == 'database'
76
77
  Requires-Dist: psycopg2-binary ; extra == 'database'
77
78
  Requires-Dist: lancedb ; extra == 'database'
79
+ Requires-Dist: tantivy ; extra == 'database'
78
80
  Provides-Extra: gcp
79
81
  Requires-Dist: google-auth-httplib2 ; extra == 'gcp'
80
82
  Requires-Dist: google-auth-oauthlib ; extra == 'gcp'
@@ -1,5 +1,5 @@
1
1
  sunholo/__init__.py,sha256=0CdpufyRKWyZe7J7UKigL6j_qOorM-p0OjHIAuf9M38,864
2
- sunholo/logging.py,sha256=00VGGArfWHbJuHHSJ4kXhHTggWnRfbVYMcZNOYIsqnA,11787
2
+ sunholo/logging.py,sha256=UUBl0_oBrW21O5cNAT5lYZ2OmAnVJ92PnAwtA_2Sz_g,12117
3
3
  sunholo/agents/__init__.py,sha256=Hb4NXy2rN-83Z0-UDRwX-LXv2R29lcbSFPf8G6q4fZg,380
4
4
  sunholo/agents/chat_history.py,sha256=8iX1bgvRW6fdp6r_DQR_caPHYrZ_9QJJgPxCiSDf3q8,5380
5
5
  sunholo/agents/dispatch_to_qa.py,sha256=nFNdxhkr7rVYuUwVoBCBNYBI2Dke6-_z_ZApBEWb_cU,8291
@@ -61,9 +61,9 @@ sunholo/database/sql/sb/delete_source_row.sql,sha256=r6fEuUKdbiLHCDGKSbKINDCpJjs
61
61
  sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUtYL5cCxbC2mj_c,255
62
62
  sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
63
63
  sunholo/discovery_engine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
- sunholo/discovery_engine/chunker_handler.py,sha256=9zv72CxgKlKpOcSAt-XFtq0Ra-Z6tBTwMuXQhRPE5mY,4425
64
+ sunholo/discovery_engine/chunker_handler.py,sha256=puNnV6vKnjNqk28FLnsIPMNsC-1Y6s20PK7ioi8qwzc,4491
65
65
  sunholo/discovery_engine/create_new.py,sha256=7oZG78T6lW0EspRzlo7-qRyXFSuFxDn2dfSAVEaqlqY,978
66
- sunholo/discovery_engine/discovery_engine_client.py,sha256=UYMhXtGWXuyyg4fA1piVjqfrWXkSP_MFimh6NzaUlZ8,12413
66
+ sunholo/discovery_engine/discovery_engine_client.py,sha256=WFi0h-JfZd0eKtW6uzm6zfffkGMP2iKBHZU1Tw1uiCw,14944
67
67
  sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
68
68
  sunholo/embedder/embed_chunk.py,sha256=P744zUQJgqrjILunzaqtTerB9AwoXFU6tXBtz4rjWgQ,6673
69
69
  sunholo/gcs/__init__.py,sha256=DtVw_AZwQn-IguR5BJuIi2XJeF_FQXizhJikzRNrXiE,50
@@ -76,7 +76,7 @@ sunholo/langfuse/prompts.py,sha256=HO4Zy9usn5tKooBPCKksuw4Lff3c03Ny5wqn4ce_xZM,1
76
76
  sunholo/llamaindex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
77
  sunholo/llamaindex/generate.py,sha256=l1Picr-hVwkmAUD7XmTCa63qY9ERliFHQXwyX3BqB2Q,686
78
78
  sunholo/llamaindex/get_files.py,sha256=6rhXCDqQ_lrIapISQ_OYQDjiSATXvS_9m3qq53-oIl0,781
79
- sunholo/llamaindex/import_files.py,sha256=WPaVfYKS08GN0HXodPH_5WCs6d_fqj-N4MQIzvlz5Xw,5610
79
+ sunholo/llamaindex/import_files.py,sha256=LIFBXxgXgRYMdVpq3FkycIoMMhLSXkKdKzK7qDuqOmQ,5566
80
80
  sunholo/lookup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
81
81
  sunholo/lookup/model_lookup.yaml,sha256=O7o-jP53MLA06C8pI-ILwERShO-xf6z_258wtpZBv6A,739
82
82
  sunholo/patches/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -111,9 +111,9 @@ sunholo/vertex/__init__.py,sha256=JvHcGFuv6R_nAhY2AdoqqhMpJ5ugeWPZ_svGhWrObBk,13
111
111
  sunholo/vertex/init.py,sha256=JDMUaBRdednzbKF-5p33qqLit2LMsvgvWW-NRz0AqO0,1801
112
112
  sunholo/vertex/memory_tools.py,sha256=8F1iTWnqEK9mX4W5RzCVKIjydIcNp6OFxjn_dtQ3GXo,5379
113
113
  sunholo/vertex/safety.py,sha256=3meAX0HyGZYrH7rXPUAHxtI_3w_zoy_RX7Shtkoa660,1275
114
- sunholo-0.69.9.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
115
- sunholo-0.69.9.dist-info/METADATA,sha256=53jro7ekONplbxTNVWi4DTYXHawjUEnEuLNPi704EwI,6155
116
- sunholo-0.69.9.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
117
- sunholo-0.69.9.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
118
- sunholo-0.69.9.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
119
- sunholo-0.69.9.dist-info/RECORD,,
114
+ sunholo-0.69.12.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
115
+ sunholo-0.69.12.dist-info/METADATA,sha256=w1SqSvez7MqPL3PlMn4PIrhlFDru7p8XLU_DPiE8_AI,6242
116
+ sunholo-0.69.12.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
117
+ sunholo-0.69.12.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
118
+ sunholo-0.69.12.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
119
+ sunholo-0.69.12.dist-info/RECORD,,