sunholo 0.134.3__py3-none-any.whl → 0.134.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/discovery_engine/discovery_engine_client.py +103 -10
- {sunholo-0.134.3.dist-info → sunholo-0.134.4.dist-info}/METADATA +1 -1
- {sunholo-0.134.3.dist-info → sunholo-0.134.4.dist-info}/RECORD +7 -7
- {sunholo-0.134.3.dist-info → sunholo-0.134.4.dist-info}/WHEEL +0 -0
- {sunholo-0.134.3.dist-info → sunholo-0.134.4.dist-info}/entry_points.txt +0 -0
- {sunholo-0.134.3.dist-info → sunholo-0.134.4.dist-info}/licenses/LICENSE.txt +0 -0
- {sunholo-0.134.3.dist-info → sunholo-0.134.4.dist-info}/top_level.txt +0 -0
@@ -309,8 +309,11 @@ class DiscoveryEngineClient:
|
|
309
309
|
|
310
310
|
# Iterate through each result in the response
|
311
311
|
for result in response.results:
|
312
|
-
|
313
|
-
|
312
|
+
if hasattr(result, 'chunk'):
|
313
|
+
chunk = result.chunk
|
314
|
+
chunk_metadata = chunk.ChunkMetadata
|
315
|
+
else:
|
316
|
+
log.warning("No chunk found in result")
|
314
317
|
|
315
318
|
if hasattr(chunk_metadata, 'previous_chunks'):
|
316
319
|
# Process previous chunks
|
@@ -339,8 +342,11 @@ class DiscoveryEngineClient:
|
|
339
342
|
|
340
343
|
# Iterate through each result in the response
|
341
344
|
for result in response.results:
|
342
|
-
|
343
|
-
|
345
|
+
if hasattr(result, 'chunk'):
|
346
|
+
chunk = result.chunk
|
347
|
+
chunk_metadata = chunk.ChunkMetadata
|
348
|
+
else:
|
349
|
+
log.warning("No chunk found in result")
|
344
350
|
|
345
351
|
if hasattr(chunk_metadata, 'previous_chunks'):
|
346
352
|
# Process previous chunks
|
@@ -431,6 +437,83 @@ class DiscoveryEngineClient:
|
|
431
437
|
data_store_ids=data_store_ids,
|
432
438
|
content_search_spec_type="documents"
|
433
439
|
)
|
440
|
+
|
441
|
+
def document_format(self, document):
|
442
|
+
"""Format a document for string output."""
|
443
|
+
# Extract useful fields from the document
|
444
|
+
document_id = document.id
|
445
|
+
document_name = document.name
|
446
|
+
|
447
|
+
# Get content if available
|
448
|
+
content = ""
|
449
|
+
if hasattr(document, 'content') and document.content:
|
450
|
+
if hasattr(document.content, 'uri') and document.content.uri:
|
451
|
+
content = f"Content URI: {document.content.uri}\n"
|
452
|
+
if hasattr(document.content, 'mime_type') and document.content.mime_type:
|
453
|
+
content += f"Content Type: {document.content.mime_type}\n"
|
454
|
+
|
455
|
+
# Get structured data if available
|
456
|
+
struct_data = ""
|
457
|
+
if hasattr(document, 'struct_data') and document.struct_data:
|
458
|
+
struct_data = f"Structured Data: {dict(document.struct_data)}\n"
|
459
|
+
|
460
|
+
# Get derived structured data if available
|
461
|
+
derived_data = ""
|
462
|
+
if hasattr(document, 'derived_struct_data') and document.derived_struct_data:
|
463
|
+
derived_data = f"Derived Data: {dict(document.derived_struct_data)}\n"
|
464
|
+
|
465
|
+
# Return formatted document string
|
466
|
+
return (
|
467
|
+
f"# Document: {document_id}\n"
|
468
|
+
f"Resource Name: {document_name}\n"
|
469
|
+
f"{content}"
|
470
|
+
f"{struct_data}"
|
471
|
+
f"{derived_data}"
|
472
|
+
)
|
473
|
+
|
474
|
+
def process_documents(self, response):
|
475
|
+
"""Process a search response containing documents into a formatted string."""
|
476
|
+
all_documents = []
|
477
|
+
|
478
|
+
# Check if the response contains results
|
479
|
+
if not hasattr(response, 'results') or not response.results:
|
480
|
+
log.info(f'No results found in response: {response=}')
|
481
|
+
return []
|
482
|
+
|
483
|
+
# Iterate through each result in the response
|
484
|
+
for result in response.results:
|
485
|
+
if hasattr(result, 'document'):
|
486
|
+
document = result.document
|
487
|
+
all_documents.append(self.document_format(document))
|
488
|
+
else:
|
489
|
+
log.warning("No document found in result")
|
490
|
+
|
491
|
+
# Combine all documents into one long string
|
492
|
+
result_string = "\n\n".join(all_documents)
|
493
|
+
|
494
|
+
return result_string
|
495
|
+
|
496
|
+
async def async_process_documents(self, response):
|
497
|
+
"""Process a search response containing documents into a formatted string asynchronously."""
|
498
|
+
all_documents = []
|
499
|
+
|
500
|
+
# Check if the response contains results
|
501
|
+
if not hasattr(response, 'results') or not response.results:
|
502
|
+
log.info(f'No results found in response: {response=}')
|
503
|
+
return []
|
504
|
+
|
505
|
+
# Iterate through each result in the response
|
506
|
+
for result in response.results:
|
507
|
+
if hasattr(result, 'document'):
|
508
|
+
document = result.document
|
509
|
+
all_documents.append(self.document_format(document))
|
510
|
+
else:
|
511
|
+
log.warning("No document found in result")
|
512
|
+
|
513
|
+
# Combine all documents into one long string
|
514
|
+
result_string = "\n\n".join(all_documents)
|
515
|
+
|
516
|
+
return result_string
|
434
517
|
|
435
518
|
def create_engine(self,
|
436
519
|
engine_id: str,
|
@@ -753,9 +836,14 @@ class DiscoveryEngineClient:
|
|
753
836
|
log.info(f"No results {search_request.data_store_specs=}: {str(e)}")
|
754
837
|
return None
|
755
838
|
|
756
|
-
if
|
757
|
-
|
758
|
-
|
839
|
+
if content_search_spec_type=="chunks":
|
840
|
+
if parse_chunks_to_string:
|
841
|
+
big_string = self.process_chunks(search_response)
|
842
|
+
log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
|
843
|
+
return big_string
|
844
|
+
elif content_search_spec_type=="documents":
|
845
|
+
big_string = self.process_documents(search_response)
|
846
|
+
log.info(f"Discovery engine documents string sample: {big_string[:100]}")
|
759
847
|
return big_string
|
760
848
|
|
761
849
|
log.info("Discovery engine response object")
|
@@ -824,9 +912,14 @@ class DiscoveryEngineClient:
|
|
824
912
|
log.info(f"No results {search_request.data_store_specs=}: {str(e)}")
|
825
913
|
return None
|
826
914
|
|
827
|
-
if
|
828
|
-
|
829
|
-
|
915
|
+
if content_search_spec_type=="chunks":
|
916
|
+
if parse_chunks_to_string:
|
917
|
+
big_string = self.process_chunks(search_response)
|
918
|
+
log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
|
919
|
+
return big_string
|
920
|
+
elif content_search_spec_type=="documents":
|
921
|
+
big_string = self.process_documents(search_response)
|
922
|
+
log.info(f"Discovery engine documents string sample: {big_string[:100]}")
|
830
923
|
return big_string
|
831
924
|
|
832
925
|
log.info("Discovery engine response object")
|
@@ -75,7 +75,7 @@ sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8R
|
|
75
75
|
sunholo/discovery_engine/chunker_handler.py,sha256=wkvXl4rFtYfN6AZUKdW9_QD49Whf77BukDbO82UwlAg,7480
|
76
76
|
sunholo/discovery_engine/cli.py,sha256=bUhCPoKrkMtdeTBHLyCZf9syVwHn5kE0yXpqDBIzmTc,34120
|
77
77
|
sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
|
78
|
-
sunholo/discovery_engine/discovery_engine_client.py,sha256=
|
78
|
+
sunholo/discovery_engine/discovery_engine_client.py,sha256=Ak3VpadtgpPWfIEot87EiNh4vbDUg9gQVa-1UDnoGMA,58442
|
79
79
|
sunholo/discovery_engine/get_ai_search_chunks.py,sha256=I6Dt1CznqEvE7XIZ2PkLqopmjpO96iVEWJJqL5cJjOU,5554
|
80
80
|
sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
|
81
81
|
sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
|
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
168
168
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
169
169
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
170
170
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
171
|
-
sunholo-0.134.
|
172
|
-
sunholo-0.134.
|
173
|
-
sunholo-0.134.
|
174
|
-
sunholo-0.134.
|
175
|
-
sunholo-0.134.
|
176
|
-
sunholo-0.134.
|
171
|
+
sunholo-0.134.4.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
172
|
+
sunholo-0.134.4.dist-info/METADATA,sha256=g2a99cYiyPfDeVa55srGe2k_5_MHsJhzwBll43hQ56M,10067
|
173
|
+
sunholo-0.134.4.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
174
|
+
sunholo-0.134.4.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
175
|
+
sunholo-0.134.4.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
176
|
+
sunholo-0.134.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|