sunholo 0.134.3__py3-none-any.whl → 0.134.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/discovery_engine/cli.py +49 -27
- sunholo/discovery_engine/discovery_engine_client.py +103 -10
- {sunholo-0.134.3.dist-info → sunholo-0.134.5.dist-info}/METADATA +1 -1
- {sunholo-0.134.3.dist-info → sunholo-0.134.5.dist-info}/RECORD +8 -8
- {sunholo-0.134.3.dist-info → sunholo-0.134.5.dist-info}/WHEEL +0 -0
- {sunholo-0.134.3.dist-info → sunholo-0.134.5.dist-info}/entry_points.txt +0 -0
- {sunholo-0.134.3.dist-info → sunholo-0.134.5.dist-info}/licenses/LICENSE.txt +0 -0
- {sunholo-0.134.3.dist-info → sunholo-0.134.5.dist-info}/top_level.txt +0 -0
sunholo/discovery_engine/cli.py
CHANGED
@@ -224,34 +224,56 @@ def search_command(args):
|
|
224
224
|
if args.parse_chunks_to_string:
|
225
225
|
console.print("\n[bold magenta]--- Combined Chunk String ---[/bold magenta]")
|
226
226
|
console.print(results_data if results_data else "[yellow]No results found or error occurred.[/yellow]")
|
227
|
+
elif isinstance(results_data, str):
|
228
|
+
# Handle string result when parse_chunks_to_string is False but a string was returned anyway
|
229
|
+
console.print("\n[bold magenta]--- Results String ---[/bold magenta]")
|
230
|
+
console.print(results_data)
|
227
231
|
elif results_data: # It's a pager object
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
232
|
+
if args.content_search_spec_type == "chunks":
|
233
|
+
console.print("\n[bold magenta]--- Individual Chunks ---[/bold magenta]")
|
234
|
+
chunk_count = 0
|
235
|
+
try:
|
236
|
+
# Iterate through the pager returned by get_chunks
|
237
|
+
for page in results_data.pages:
|
238
|
+
if not hasattr(page, 'results') or not page.results: continue
|
239
|
+
for result in page.results:
|
240
|
+
# Ensure the result structure is as expected by get_chunks
|
241
|
+
if hasattr(result, 'chunk'):
|
242
|
+
chunk_count += 1
|
243
|
+
console.print(f"\n[bold]Chunk {chunk_count}:[/bold]")
|
244
|
+
# Use the client's formatter if available
|
245
|
+
console.print(client.chunk_format(result.chunk))
|
246
|
+
elif hasattr(result, 'document') and hasattr(result.document, 'chunks'):
|
247
|
+
# Fallback if structure is different (e.g., document with chunks)
|
248
|
+
for chunk in result.document.chunks:
|
249
|
+
chunk_count += 1
|
250
|
+
console.print(f"\n[bold]Chunk {chunk_count} (from doc {result.document.id}):[/bold]")
|
251
|
+
console.print(f" Content: {getattr(chunk, 'content', 'N/A')}")
|
252
|
+
console.print(f" Doc Name: {getattr(chunk, 'document_metadata', {}).get('name', 'N/A')}")
|
253
|
+
if chunk_count == 0:
|
254
|
+
console.print("[yellow]No chunks found in the results.[/yellow]")
|
255
|
+
|
256
|
+
except Exception as page_err:
|
257
|
+
console.print(f"[bold red]Error processing search results pager: {page_err}[/bold red]")
|
258
|
+
console.print(f"[red]{traceback.format_exc()}[/red]")
|
259
|
+
elif args.content_search_spec_type == "documents":
|
260
|
+
console.print("\n[bold magenta]--- Individual Documents ---[/bold magenta]")
|
261
|
+
doc_count = 0
|
262
|
+
try:
|
263
|
+
# Iterate through the pager returned by get_documents
|
264
|
+
for page in results_data.pages:
|
265
|
+
if not hasattr(page, 'results') or not page.results: continue
|
266
|
+
for result in page.results:
|
267
|
+
if hasattr(result, 'document'):
|
268
|
+
doc_count += 1
|
269
|
+
console.print(f"\n[bold]Document {doc_count}:[/bold]")
|
270
|
+
console.print(client.document_format(result.document))
|
271
|
+
|
272
|
+
if doc_count == 0:
|
273
|
+
console.print("[yellow]No documents found in the results.[/yellow]")
|
274
|
+
except Exception as page_err:
|
275
|
+
console.print(f"[bold red]Error processing document results: {page_err}[/bold red]")
|
276
|
+
console.print(f"[red]{traceback.format_exc()}[/red]")
|
255
277
|
else:
|
256
278
|
console.print("[yellow]No results found or error occurred.[/yellow]")
|
257
279
|
|
@@ -309,8 +309,11 @@ class DiscoveryEngineClient:
|
|
309
309
|
|
310
310
|
# Iterate through each result in the response
|
311
311
|
for result in response.results:
|
312
|
-
|
313
|
-
|
312
|
+
if hasattr(result, 'chunk'):
|
313
|
+
chunk = result.chunk
|
314
|
+
chunk_metadata = chunk.ChunkMetadata
|
315
|
+
else:
|
316
|
+
log.warning("No chunk found in result")
|
314
317
|
|
315
318
|
if hasattr(chunk_metadata, 'previous_chunks'):
|
316
319
|
# Process previous chunks
|
@@ -339,8 +342,11 @@ class DiscoveryEngineClient:
|
|
339
342
|
|
340
343
|
# Iterate through each result in the response
|
341
344
|
for result in response.results:
|
342
|
-
|
343
|
-
|
345
|
+
if hasattr(result, 'chunk'):
|
346
|
+
chunk = result.chunk
|
347
|
+
chunk_metadata = chunk.ChunkMetadata
|
348
|
+
else:
|
349
|
+
log.warning("No chunk found in result")
|
344
350
|
|
345
351
|
if hasattr(chunk_metadata, 'previous_chunks'):
|
346
352
|
# Process previous chunks
|
@@ -431,6 +437,83 @@ class DiscoveryEngineClient:
|
|
431
437
|
data_store_ids=data_store_ids,
|
432
438
|
content_search_spec_type="documents"
|
433
439
|
)
|
440
|
+
|
441
|
+
def document_format(self, document):
|
442
|
+
"""Format a document for string output."""
|
443
|
+
# Extract useful fields from the document
|
444
|
+
document_id = document.id
|
445
|
+
document_name = document.name
|
446
|
+
|
447
|
+
# Get content if available
|
448
|
+
content = ""
|
449
|
+
if hasattr(document, 'content') and document.content:
|
450
|
+
if hasattr(document.content, 'uri') and document.content.uri:
|
451
|
+
content = f"Content URI: {document.content.uri}\n"
|
452
|
+
if hasattr(document.content, 'mime_type') and document.content.mime_type:
|
453
|
+
content += f"Content Type: {document.content.mime_type}\n"
|
454
|
+
|
455
|
+
# Get structured data if available
|
456
|
+
struct_data = ""
|
457
|
+
if hasattr(document, 'struct_data') and document.struct_data:
|
458
|
+
struct_data = f"Structured Data: {dict(document.struct_data)}\n"
|
459
|
+
|
460
|
+
# Get derived structured data if available
|
461
|
+
derived_data = ""
|
462
|
+
if hasattr(document, 'derived_struct_data') and document.derived_struct_data:
|
463
|
+
derived_data = f"Derived Data: {dict(document.derived_struct_data)}\n"
|
464
|
+
|
465
|
+
# Return formatted document string
|
466
|
+
return (
|
467
|
+
f"# Document: {document_id}\n"
|
468
|
+
f"Resource Name: {document_name}\n"
|
469
|
+
f"{content}"
|
470
|
+
f"{struct_data}"
|
471
|
+
f"{derived_data}"
|
472
|
+
)
|
473
|
+
|
474
|
+
def process_documents(self, response):
|
475
|
+
"""Process a search response containing documents into a formatted string."""
|
476
|
+
all_documents = []
|
477
|
+
|
478
|
+
# Check if the response contains results
|
479
|
+
if not hasattr(response, 'results') or not response.results:
|
480
|
+
log.info(f'No results found in response: {response=}')
|
481
|
+
return []
|
482
|
+
|
483
|
+
# Iterate through each result in the response
|
484
|
+
for result in response.results:
|
485
|
+
if hasattr(result, 'document'):
|
486
|
+
document = result.document
|
487
|
+
all_documents.append(self.document_format(document))
|
488
|
+
else:
|
489
|
+
log.warning("No document found in result")
|
490
|
+
|
491
|
+
# Combine all documents into one long string
|
492
|
+
result_string = "\n\n".join(all_documents)
|
493
|
+
|
494
|
+
return result_string
|
495
|
+
|
496
|
+
async def async_process_documents(self, response):
|
497
|
+
"""Process a search response containing documents into a formatted string asynchronously."""
|
498
|
+
all_documents = []
|
499
|
+
|
500
|
+
# Check if the response contains results
|
501
|
+
if not hasattr(response, 'results') or not response.results:
|
502
|
+
log.info(f'No results found in response: {response=}')
|
503
|
+
return []
|
504
|
+
|
505
|
+
# Iterate through each result in the response
|
506
|
+
for result in response.results:
|
507
|
+
if hasattr(result, 'document'):
|
508
|
+
document = result.document
|
509
|
+
all_documents.append(self.document_format(document))
|
510
|
+
else:
|
511
|
+
log.warning("No document found in result")
|
512
|
+
|
513
|
+
# Combine all documents into one long string
|
514
|
+
result_string = "\n\n".join(all_documents)
|
515
|
+
|
516
|
+
return result_string
|
434
517
|
|
435
518
|
def create_engine(self,
|
436
519
|
engine_id: str,
|
@@ -753,9 +836,14 @@ class DiscoveryEngineClient:
|
|
753
836
|
log.info(f"No results {search_request.data_store_specs=}: {str(e)}")
|
754
837
|
return None
|
755
838
|
|
756
|
-
if
|
757
|
-
|
758
|
-
|
839
|
+
if content_search_spec_type=="chunks":
|
840
|
+
if parse_chunks_to_string:
|
841
|
+
big_string = self.process_chunks(search_response)
|
842
|
+
log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
|
843
|
+
return big_string
|
844
|
+
elif content_search_spec_type=="documents":
|
845
|
+
big_string = self.process_documents(search_response)
|
846
|
+
log.info(f"Discovery engine documents string sample: {big_string[:100]}")
|
759
847
|
return big_string
|
760
848
|
|
761
849
|
log.info("Discovery engine response object")
|
@@ -824,9 +912,14 @@ class DiscoveryEngineClient:
|
|
824
912
|
log.info(f"No results {search_request.data_store_specs=}: {str(e)}")
|
825
913
|
return None
|
826
914
|
|
827
|
-
if
|
828
|
-
|
829
|
-
|
915
|
+
if content_search_spec_type=="chunks":
|
916
|
+
if parse_chunks_to_string:
|
917
|
+
big_string = self.process_chunks(search_response)
|
918
|
+
log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
|
919
|
+
return big_string
|
920
|
+
elif content_search_spec_type=="documents":
|
921
|
+
big_string = self.process_documents(search_response)
|
922
|
+
log.info(f"Discovery engine documents string sample: {big_string[:100]}")
|
830
923
|
return big_string
|
831
924
|
|
832
925
|
log.info("Discovery engine response object")
|
@@ -73,9 +73,9 @@ sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUt
|
|
73
73
|
sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
|
74
74
|
sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8RJ9OyKdQ,130
|
75
75
|
sunholo/discovery_engine/chunker_handler.py,sha256=wkvXl4rFtYfN6AZUKdW9_QD49Whf77BukDbO82UwlAg,7480
|
76
|
-
sunholo/discovery_engine/cli.py,sha256=
|
76
|
+
sunholo/discovery_engine/cli.py,sha256=0FKitDCqnKwtONyGt7gmsRoE5W6HHCIDqaTt8S0Dw4s,35631
|
77
77
|
sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
|
78
|
-
sunholo/discovery_engine/discovery_engine_client.py,sha256=
|
78
|
+
sunholo/discovery_engine/discovery_engine_client.py,sha256=Ak3VpadtgpPWfIEot87EiNh4vbDUg9gQVa-1UDnoGMA,58442
|
79
79
|
sunholo/discovery_engine/get_ai_search_chunks.py,sha256=I6Dt1CznqEvE7XIZ2PkLqopmjpO96iVEWJJqL5cJjOU,5554
|
80
80
|
sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
|
81
81
|
sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
|
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
168
168
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
169
169
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
170
170
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
171
|
-
sunholo-0.134.
|
172
|
-
sunholo-0.134.
|
173
|
-
sunholo-0.134.
|
174
|
-
sunholo-0.134.
|
175
|
-
sunholo-0.134.
|
176
|
-
sunholo-0.134.
|
171
|
+
sunholo-0.134.5.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
172
|
+
sunholo-0.134.5.dist-info/METADATA,sha256=OyzGPXRwE0gTZBioO_oT-pHLXH4s-Fw51ws6pWmT_Jc,10067
|
173
|
+
sunholo-0.134.5.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
174
|
+
sunholo-0.134.5.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
175
|
+
sunholo-0.134.5.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
176
|
+
sunholo-0.134.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|