sunholo 0.135.0__py3-none-any.whl → 0.136.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/discovery_engine/discovery_engine_client.py +129 -28
- {sunholo-0.135.0.dist-info → sunholo-0.136.1.dist-info}/METADATA +1 -1
- {sunholo-0.135.0.dist-info → sunholo-0.136.1.dist-info}/RECORD +7 -7
- {sunholo-0.135.0.dist-info → sunholo-0.136.1.dist-info}/WHEEL +1 -1
- {sunholo-0.135.0.dist-info → sunholo-0.136.1.dist-info}/entry_points.txt +0 -0
- {sunholo-0.135.0.dist-info → sunholo-0.136.1.dist-info}/licenses/LICENSE.txt +0 -0
- {sunholo-0.135.0.dist-info → sunholo-0.136.1.dist-info}/top_level.txt +0 -0
@@ -374,7 +374,8 @@ class DiscoveryEngineClient:
|
|
374
374
|
parse_documents_to_string: bool = True,
|
375
375
|
serving_config: str = "default_serving_config",
|
376
376
|
data_store_ids: Optional[List[str]] = None,
|
377
|
-
filter_str:str=None
|
377
|
+
filter_str:str=None,
|
378
|
+
max_limit:int=None
|
378
379
|
):
|
379
380
|
"""Retrieves entire documents based on a query.
|
380
381
|
|
@@ -405,7 +406,8 @@ class DiscoveryEngineClient:
|
|
405
406
|
parse_chunks_to_string=parse_documents_to_string,
|
406
407
|
serving_config=serving_config,
|
407
408
|
data_store_ids=data_store_ids,
|
408
|
-
content_search_spec_type="documents"
|
409
|
+
content_search_spec_type="documents",
|
410
|
+
max_limit=max_limit
|
409
411
|
)
|
410
412
|
|
411
413
|
async def async_get_documents(
|
@@ -415,7 +417,9 @@ class DiscoveryEngineClient:
|
|
415
417
|
parse_documents_to_string: bool = True,
|
416
418
|
serving_config: str = "default_serving_config",
|
417
419
|
data_store_ids: Optional[List[str]] = None,
|
418
|
-
filter_str:str=None
|
420
|
+
filter_str:str=None,
|
421
|
+
max_limit:int=None
|
422
|
+
|
419
423
|
):
|
420
424
|
"""Asynchronously retrieves entire documents based on a query.
|
421
425
|
|
@@ -439,7 +443,9 @@ class DiscoveryEngineClient:
|
|
439
443
|
parse_chunks_to_string=parse_documents_to_string,
|
440
444
|
serving_config=serving_config,
|
441
445
|
data_store_ids=data_store_ids,
|
442
|
-
content_search_spec_type="documents"
|
446
|
+
content_search_spec_type="documents",
|
447
|
+
max_limit=max_limit
|
448
|
+
|
443
449
|
)
|
444
450
|
|
445
451
|
def document_format(self, document):
|
@@ -476,44 +482,79 @@ class DiscoveryEngineClient:
|
|
476
482
|
f"{derived_data}"
|
477
483
|
)
|
478
484
|
|
479
|
-
def process_documents(self, response):
|
485
|
+
def process_documents(self, response, max_limit:int=None):
|
480
486
|
"""Process a search response containing documents into a formatted string."""
|
481
487
|
all_documents = []
|
482
|
-
|
488
|
+
result_count = 0
|
483
489
|
# Check if the response contains results
|
484
|
-
if not hasattr(response, 'results') or not response.results:
|
490
|
+
if not response or not hasattr(response, 'results') or not response.results:
|
485
491
|
log.info(f'No results found in response: {response=}')
|
486
492
|
return []
|
487
493
|
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
494
|
+
should_break=False
|
495
|
+
# Process the pager properly
|
496
|
+
for page in response.pages:
|
497
|
+
if should_break:
|
498
|
+
break
|
499
|
+
if hasattr(page, 'results') and page.results:
|
500
|
+
for result in page.results:
|
501
|
+
if result_count >= max_limit:
|
502
|
+
log.info("Breaking results loop as max limit reached")
|
503
|
+
should_break = True # Set flag to break outer loop
|
504
|
+
break
|
505
|
+
|
506
|
+
if hasattr(result, 'document'):
|
507
|
+
document = result.document
|
508
|
+
all_documents.append(self.document_format(document))
|
509
|
+
result_count += 1
|
510
|
+
|
511
|
+
# Check if we've reached max_limit
|
512
|
+
if max_limit is not None and result_count >= max_limit:
|
513
|
+
log.info(f"Reached max_limit of {max_limit} results, stopping processing")
|
514
|
+
should_break = True
|
515
|
+
break
|
516
|
+
else:
|
517
|
+
log.warning("No document found in result")
|
495
518
|
|
496
519
|
# Combine all documents into one long string
|
497
520
|
result_string = "\n\n".join(all_documents)
|
498
521
|
|
499
522
|
return result_string
|
500
523
|
|
501
|
-
async def async_process_documents(self, response):
|
524
|
+
async def async_process_documents(self, response, max_limit:int=None):
|
502
525
|
"""Process a search response containing documents into a formatted string asynchronously."""
|
503
526
|
all_documents = []
|
527
|
+
result_count = 0
|
504
528
|
|
505
529
|
# Check if the response contains results
|
506
|
-
if not hasattr(response, 'results') or not response.results:
|
530
|
+
if not response or not hasattr(response, 'results') or not response.results:
|
507
531
|
log.info(f'No results found in response: {response=}')
|
508
532
|
return []
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
533
|
+
|
534
|
+
should_break=False
|
535
|
+
# Process the pager properly
|
536
|
+
async for page in response.pages:
|
537
|
+
if should_break:
|
538
|
+
break
|
539
|
+
if hasattr(page, 'results') and page.results:
|
540
|
+
for result in page.results:
|
541
|
+
if result_count >= max_limit:
|
542
|
+
log.info("Breaking results loop as max limit reached")
|
543
|
+
should_break = True # Set flag to break outer loop
|
544
|
+
break
|
545
|
+
|
546
|
+
if hasattr(result, 'document'):
|
547
|
+
document = result.document
|
548
|
+
all_documents.append(self.document_format(document))
|
549
|
+
result_count += 1
|
550
|
+
|
551
|
+
# Check if we've reached max_limit
|
552
|
+
if max_limit is not None and result_count >= max_limit:
|
553
|
+
log.info(f"Reached max_limit of {max_limit} results, stopping processing")
|
554
|
+
should_break = True
|
555
|
+
break
|
556
|
+
else:
|
557
|
+
log.warning("No document found in result")
|
517
558
|
|
518
559
|
# Combine all documents into one long string
|
519
560
|
result_string = "\n\n".join(all_documents)
|
@@ -781,7 +822,8 @@ class DiscoveryEngineClient:
|
|
781
822
|
page_size=10, parse_chunks_to_string=True,
|
782
823
|
serving_config="default_serving_config",
|
783
824
|
data_store_ids: Optional[List[str]] = None,
|
784
|
-
content_search_spec_type="chunks"
|
825
|
+
content_search_spec_type="chunks",
|
826
|
+
max_limit=None):
|
785
827
|
"""
|
786
828
|
Searches with a generic filter string.
|
787
829
|
|
@@ -793,6 +835,8 @@ class DiscoveryEngineClient:
|
|
793
835
|
Returns:
|
794
836
|
discoveryengine.SearchResponse or str: The search response object or string of chunks.
|
795
837
|
"""
|
838
|
+
if max_limit is not None and max_limit < page_size:
|
839
|
+
page_size = max_limit
|
796
840
|
|
797
841
|
serving_config_path = self.search_client.serving_config_path(
|
798
842
|
self.project_id,
|
@@ -840,6 +884,33 @@ class DiscoveryEngineClient:
|
|
840
884
|
except Exception as e:
|
841
885
|
log.info(f"No results {search_request.data_store_specs=}: {str(e)}")
|
842
886
|
return None
|
887
|
+
|
888
|
+
# Apply max_limit if needed
|
889
|
+
if content_search_spec_type=="documents" and max_limit is not None:
|
890
|
+
# For raw response objects (when parse_chunks_to_string=False)
|
891
|
+
if not parse_chunks_to_string:
|
892
|
+
# We need to limit the pager results before returning
|
893
|
+
limited_response = search_response
|
894
|
+
# Store the original pages iterator method
|
895
|
+
original_pages = limited_response.pages
|
896
|
+
|
897
|
+
# Override the pages property with a custom iterator that respects max_limit
|
898
|
+
def limited_pages_iterator():
|
899
|
+
results_count = 0
|
900
|
+
for page in original_pages:
|
901
|
+
yield page
|
902
|
+
|
903
|
+
# Count results in this page
|
904
|
+
if hasattr(page, 'results'):
|
905
|
+
results_count += len(page.results)
|
906
|
+
|
907
|
+
# Stop if we've reached max_limit
|
908
|
+
if results_count >= max_limit:
|
909
|
+
break
|
910
|
+
|
911
|
+
# Replace the pages property with our custom iterator
|
912
|
+
limited_response.pages = limited_pages_iterator()
|
913
|
+
return limited_response
|
843
914
|
|
844
915
|
if parse_chunks_to_string:
|
845
916
|
if content_search_spec_type=="chunks":
|
@@ -850,7 +921,7 @@ class DiscoveryEngineClient:
|
|
850
921
|
return big_string
|
851
922
|
|
852
923
|
elif content_search_spec_type=="documents":
|
853
|
-
big_string = self.process_documents(search_response)
|
924
|
+
big_string = self.process_documents(search_response, max_limit=max_limit)
|
854
925
|
log.info(f"Discovery engine documents string sample: {big_string[:100]}")
|
855
926
|
|
856
927
|
return big_string
|
@@ -864,7 +935,8 @@ class DiscoveryEngineClient:
|
|
864
935
|
page_size=10, parse_chunks_to_string=True,
|
865
936
|
serving_config="default_serving_config",
|
866
937
|
data_store_ids: Optional[List[str]] = None,
|
867
|
-
content_search_spec_type="chunks"
|
938
|
+
content_search_spec_type="chunks",
|
939
|
+
max_limit=None):
|
868
940
|
"""
|
869
941
|
Searches with a generic filter string asynchronously.
|
870
942
|
|
@@ -876,6 +948,8 @@ class DiscoveryEngineClient:
|
|
876
948
|
Returns:
|
877
949
|
discoveryengine.SearchResponse or str: The search response object or string of chunks.
|
878
950
|
"""
|
951
|
+
if max_limit is not None and max_limit < page_size:
|
952
|
+
page_size = max_limit
|
879
953
|
|
880
954
|
serving_config_path = self.async_search_client.serving_config_path(
|
881
955
|
self.project_id,
|
@@ -922,6 +996,33 @@ class DiscoveryEngineClient:
|
|
922
996
|
log.info(f"No results {search_request.data_store_specs=}: {str(e)}")
|
923
997
|
return None
|
924
998
|
|
999
|
+
# Apply max_limit if needed
|
1000
|
+
if content_search_spec_type=="documents" and max_limit is not None:
|
1001
|
+
# For raw response objects (when parse_chunks_to_string=False)
|
1002
|
+
if not parse_chunks_to_string:
|
1003
|
+
# We need to limit the pager results before returning
|
1004
|
+
limited_response = search_response
|
1005
|
+
# Store the original pages iterator method
|
1006
|
+
original_pages = limited_response.pages
|
1007
|
+
|
1008
|
+
# Override the pages property with a custom iterator that respects max_limit
|
1009
|
+
async def limited_pages_iterator():
|
1010
|
+
results_count = 0
|
1011
|
+
async for page in original_pages:
|
1012
|
+
yield page
|
1013
|
+
|
1014
|
+
# Count results in this page
|
1015
|
+
if hasattr(page, 'results'):
|
1016
|
+
results_count += len(page.results)
|
1017
|
+
|
1018
|
+
# Stop if we've reached max_limit
|
1019
|
+
if results_count >= max_limit:
|
1020
|
+
break
|
1021
|
+
|
1022
|
+
# Replace the pages property with our custom iterator
|
1023
|
+
limited_response.pages = limited_pages_iterator()
|
1024
|
+
return limited_response
|
1025
|
+
|
925
1026
|
if parse_chunks_to_string:
|
926
1027
|
if content_search_spec_type=="chunks":
|
927
1028
|
if parse_chunks_to_string:
|
@@ -931,7 +1032,7 @@ class DiscoveryEngineClient:
|
|
931
1032
|
return big_string
|
932
1033
|
|
933
1034
|
elif content_search_spec_type=="documents":
|
934
|
-
big_string = await self.async_process_documents(search_response)
|
1035
|
+
big_string = await self.async_process_documents(search_response, max_limit=max_limit)
|
935
1036
|
log.info(f"Discovery engine documents string sample: {big_string[:100]}")
|
936
1037
|
|
937
1038
|
return big_string
|
@@ -75,7 +75,7 @@ sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8R
|
|
75
75
|
sunholo/discovery_engine/chunker_handler.py,sha256=wkvXl4rFtYfN6AZUKdW9_QD49Whf77BukDbO82UwlAg,7480
|
76
76
|
sunholo/discovery_engine/cli.py,sha256=tsKqNSDCEsDTz5-wuNwjttb3Xt35D97-KyyEiaqolMQ,35628
|
77
77
|
sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
|
78
|
-
sunholo/discovery_engine/discovery_engine_client.py,sha256
|
78
|
+
sunholo/discovery_engine/discovery_engine_client.py,sha256=-0vSF4vd26ihhj7_XkxZJ6TvUCJrThLxZ8lMSTe5vqs,63448
|
79
79
|
sunholo/discovery_engine/get_ai_search_chunks.py,sha256=I6Dt1CznqEvE7XIZ2PkLqopmjpO96iVEWJJqL5cJjOU,5554
|
80
80
|
sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
|
81
81
|
sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
|
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
168
168
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
169
169
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
170
170
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
171
|
-
sunholo-0.
|
172
|
-
sunholo-0.
|
173
|
-
sunholo-0.
|
174
|
-
sunholo-0.
|
175
|
-
sunholo-0.
|
176
|
-
sunholo-0.
|
171
|
+
sunholo-0.136.1.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
172
|
+
sunholo-0.136.1.dist-info/METADATA,sha256=2aeqbpV_AQcKkI1c_WUgZh63q6a7ZMNaAbk5C9fSzAY,10067
|
173
|
+
sunholo-0.136.1.dist-info/WHEEL,sha256=ooBFpIzZCPdw3uqIQsOo4qqbA4ZRPxHnOH7peeONza0,91
|
174
|
+
sunholo-0.136.1.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
175
|
+
sunholo-0.136.1.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
176
|
+
sunholo-0.136.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|