sunholo 0.135.0__py3-none-any.whl → 0.136.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/discovery_engine/discovery_engine_client.py +92 -13
- {sunholo-0.135.0.dist-info → sunholo-0.136.0.dist-info}/METADATA +1 -1
- {sunholo-0.135.0.dist-info → sunholo-0.136.0.dist-info}/RECORD +7 -7
- {sunholo-0.135.0.dist-info → sunholo-0.136.0.dist-info}/WHEEL +1 -1
- {sunholo-0.135.0.dist-info → sunholo-0.136.0.dist-info}/entry_points.txt +0 -0
- {sunholo-0.135.0.dist-info → sunholo-0.136.0.dist-info}/licenses/LICENSE.txt +0 -0
- {sunholo-0.135.0.dist-info → sunholo-0.136.0.dist-info}/top_level.txt +0 -0
@@ -374,7 +374,8 @@ class DiscoveryEngineClient:
|
|
374
374
|
parse_documents_to_string: bool = True,
|
375
375
|
serving_config: str = "default_serving_config",
|
376
376
|
data_store_ids: Optional[List[str]] = None,
|
377
|
-
filter_str:str=None
|
377
|
+
filter_str:str=None,
|
378
|
+
max_limit:int=None
|
378
379
|
):
|
379
380
|
"""Retrieves entire documents based on a query.
|
380
381
|
|
@@ -405,7 +406,8 @@ class DiscoveryEngineClient:
|
|
405
406
|
parse_chunks_to_string=parse_documents_to_string,
|
406
407
|
serving_config=serving_config,
|
407
408
|
data_store_ids=data_store_ids,
|
408
|
-
content_search_spec_type="documents"
|
409
|
+
content_search_spec_type="documents",
|
410
|
+
max_limit=max_limit
|
409
411
|
)
|
410
412
|
|
411
413
|
async def async_get_documents(
|
@@ -415,7 +417,9 @@ class DiscoveryEngineClient:
|
|
415
417
|
parse_documents_to_string: bool = True,
|
416
418
|
serving_config: str = "default_serving_config",
|
417
419
|
data_store_ids: Optional[List[str]] = None,
|
418
|
-
filter_str:str=None
|
420
|
+
filter_str:str=None,
|
421
|
+
max_limit:int=None
|
422
|
+
|
419
423
|
):
|
420
424
|
"""Asynchronously retrieves entire documents based on a query.
|
421
425
|
|
@@ -439,7 +443,9 @@ class DiscoveryEngineClient:
|
|
439
443
|
parse_chunks_to_string=parse_documents_to_string,
|
440
444
|
serving_config=serving_config,
|
441
445
|
data_store_ids=data_store_ids,
|
442
|
-
content_search_spec_type="documents"
|
446
|
+
content_search_spec_type="documents",
|
447
|
+
max_limit=max_limit
|
448
|
+
|
443
449
|
)
|
444
450
|
|
445
451
|
def document_format(self, document):
|
@@ -476,20 +482,26 @@ class DiscoveryEngineClient:
|
|
476
482
|
f"{derived_data}"
|
477
483
|
)
|
478
484
|
|
479
|
-
def process_documents(self, response):
|
485
|
+
def process_documents(self, response, max_limit:int=None):
|
480
486
|
"""Process a search response containing documents into a formatted string."""
|
481
487
|
all_documents = []
|
482
|
-
|
488
|
+
result_count = 0
|
483
489
|
# Check if the response contains results
|
484
490
|
if not hasattr(response, 'results') or not response.results:
|
485
491
|
log.info(f'No results found in response: {response=}')
|
486
492
|
return []
|
487
493
|
|
488
|
-
# Iterate through each result in the
|
494
|
+
# Iterate through each result in the page
|
489
495
|
for result in response.results:
|
490
496
|
if hasattr(result, 'document'):
|
491
497
|
document = result.document
|
492
498
|
all_documents.append(self.document_format(document))
|
499
|
+
result_count += 1
|
500
|
+
|
501
|
+
# Check if we've reached max_limit
|
502
|
+
if max_limit is not None and result_count >= max_limit:
|
503
|
+
log.info(f"Reached max_limit of {max_limit} results, stopping processing")
|
504
|
+
break
|
493
505
|
else:
|
494
506
|
log.warning("No document found in result")
|
495
507
|
|
@@ -498,20 +510,27 @@ class DiscoveryEngineClient:
|
|
498
510
|
|
499
511
|
return result_string
|
500
512
|
|
501
|
-
async def async_process_documents(self, response):
|
513
|
+
async def async_process_documents(self, response, max_limit:int=None):
|
502
514
|
"""Process a search response containing documents into a formatted string asynchronously."""
|
503
515
|
all_documents = []
|
516
|
+
result_count = 0
|
504
517
|
|
505
518
|
# Check if the response contains results
|
506
519
|
if not hasattr(response, 'results') or not response.results:
|
507
520
|
log.info(f'No results found in response: {response=}')
|
508
521
|
return []
|
509
522
|
|
510
|
-
# Iterate through each result in the
|
523
|
+
# Iterate through each result in the page
|
511
524
|
for result in response.results:
|
512
525
|
if hasattr(result, 'document'):
|
513
526
|
document = result.document
|
514
527
|
all_documents.append(self.document_format(document))
|
528
|
+
result_count += 1
|
529
|
+
|
530
|
+
# Check if we've reached max_limit
|
531
|
+
if max_limit is not None and result_count >= max_limit:
|
532
|
+
log.info(f"Reached max_limit of {max_limit} results, stopping processing")
|
533
|
+
break
|
515
534
|
else:
|
516
535
|
log.warning("No document found in result")
|
517
536
|
|
@@ -781,7 +800,8 @@ class DiscoveryEngineClient:
|
|
781
800
|
page_size=10, parse_chunks_to_string=True,
|
782
801
|
serving_config="default_serving_config",
|
783
802
|
data_store_ids: Optional[List[str]] = None,
|
784
|
-
content_search_spec_type="chunks"
|
803
|
+
content_search_spec_type="chunks",
|
804
|
+
max_limit=None):
|
785
805
|
"""
|
786
806
|
Searches with a generic filter string.
|
787
807
|
|
@@ -793,6 +813,8 @@ class DiscoveryEngineClient:
|
|
793
813
|
Returns:
|
794
814
|
discoveryengine.SearchResponse or str: The search response object or string of chunks.
|
795
815
|
"""
|
816
|
+
if max_limit is not None and max_limit < page_size:
|
817
|
+
page_size = max_limit
|
796
818
|
|
797
819
|
serving_config_path = self.search_client.serving_config_path(
|
798
820
|
self.project_id,
|
@@ -840,6 +862,33 @@ class DiscoveryEngineClient:
|
|
840
862
|
except Exception as e:
|
841
863
|
log.info(f"No results {search_request.data_store_specs=}: {str(e)}")
|
842
864
|
return None
|
865
|
+
|
866
|
+
# Apply max_limit if needed
|
867
|
+
if content_search_spec_type=="documents" and max_limit is not None:
|
868
|
+
# For raw response objects (when parse_chunks_to_string=False)
|
869
|
+
if not parse_chunks_to_string:
|
870
|
+
# We need to limit the pager results before returning
|
871
|
+
limited_response = search_response
|
872
|
+
# Store the original pages iterator method
|
873
|
+
original_pages = limited_response.pages
|
874
|
+
|
875
|
+
# Override the pages property with a custom iterator that respects max_limit
|
876
|
+
def limited_pages_iterator():
|
877
|
+
results_count = 0
|
878
|
+
for page in original_pages:
|
879
|
+
yield page
|
880
|
+
|
881
|
+
# Count results in this page
|
882
|
+
if hasattr(page, 'results'):
|
883
|
+
results_count += len(page.results)
|
884
|
+
|
885
|
+
# Stop if we've reached max_limit
|
886
|
+
if results_count >= max_limit:
|
887
|
+
break
|
888
|
+
|
889
|
+
# Replace the pages property with our custom iterator
|
890
|
+
limited_response.pages = limited_pages_iterator()
|
891
|
+
return limited_response
|
843
892
|
|
844
893
|
if parse_chunks_to_string:
|
845
894
|
if content_search_spec_type=="chunks":
|
@@ -850,7 +899,7 @@ class DiscoveryEngineClient:
|
|
850
899
|
return big_string
|
851
900
|
|
852
901
|
elif content_search_spec_type=="documents":
|
853
|
-
big_string = self.process_documents(search_response)
|
902
|
+
big_string = self.process_documents(search_response, max_limit=max_limit)
|
854
903
|
log.info(f"Discovery engine documents string sample: {big_string[:100]}")
|
855
904
|
|
856
905
|
return big_string
|
@@ -864,7 +913,8 @@ class DiscoveryEngineClient:
|
|
864
913
|
page_size=10, parse_chunks_to_string=True,
|
865
914
|
serving_config="default_serving_config",
|
866
915
|
data_store_ids: Optional[List[str]] = None,
|
867
|
-
content_search_spec_type="chunks"
|
916
|
+
content_search_spec_type="chunks",
|
917
|
+
max_limit=None):
|
868
918
|
"""
|
869
919
|
Searches with a generic filter string asynchronously.
|
870
920
|
|
@@ -876,6 +926,8 @@ class DiscoveryEngineClient:
|
|
876
926
|
Returns:
|
877
927
|
discoveryengine.SearchResponse or str: The search response object or string of chunks.
|
878
928
|
"""
|
929
|
+
if max_limit is not None and max_limit < page_size:
|
930
|
+
page_size = max_limit
|
879
931
|
|
880
932
|
serving_config_path = self.async_search_client.serving_config_path(
|
881
933
|
self.project_id,
|
@@ -922,6 +974,33 @@ class DiscoveryEngineClient:
|
|
922
974
|
log.info(f"No results {search_request.data_store_specs=}: {str(e)}")
|
923
975
|
return None
|
924
976
|
|
977
|
+
# Apply max_limit if needed
|
978
|
+
if content_search_spec_type=="documents" and max_limit is not None:
|
979
|
+
# For raw response objects (when parse_chunks_to_string=False)
|
980
|
+
if not parse_chunks_to_string:
|
981
|
+
# We need to limit the pager results before returning
|
982
|
+
limited_response = search_response
|
983
|
+
# Store the original pages iterator method
|
984
|
+
original_pages = limited_response.pages
|
985
|
+
|
986
|
+
# Override the pages property with a custom iterator that respects max_limit
|
987
|
+
async def limited_pages_iterator():
|
988
|
+
results_count = 0
|
989
|
+
async for page in original_pages:
|
990
|
+
yield page
|
991
|
+
|
992
|
+
# Count results in this page
|
993
|
+
if hasattr(page, 'results'):
|
994
|
+
results_count += len(page.results)
|
995
|
+
|
996
|
+
# Stop if we've reached max_limit
|
997
|
+
if results_count >= max_limit:
|
998
|
+
break
|
999
|
+
|
1000
|
+
# Replace the pages property with our custom iterator
|
1001
|
+
limited_response.pages = limited_pages_iterator()
|
1002
|
+
return limited_response
|
1003
|
+
|
925
1004
|
if parse_chunks_to_string:
|
926
1005
|
if content_search_spec_type=="chunks":
|
927
1006
|
if parse_chunks_to_string:
|
@@ -931,7 +1010,7 @@ class DiscoveryEngineClient:
|
|
931
1010
|
return big_string
|
932
1011
|
|
933
1012
|
elif content_search_spec_type=="documents":
|
934
|
-
big_string = await self.async_process_documents(search_response)
|
1013
|
+
big_string = await self.async_process_documents(search_response, max_limit=max_limit)
|
935
1014
|
log.info(f"Discovery engine documents string sample: {big_string[:100]}")
|
936
1015
|
|
937
1016
|
return big_string
|
@@ -75,7 +75,7 @@ sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8R
|
|
75
75
|
sunholo/discovery_engine/chunker_handler.py,sha256=wkvXl4rFtYfN6AZUKdW9_QD49Whf77BukDbO82UwlAg,7480
|
76
76
|
sunholo/discovery_engine/cli.py,sha256=tsKqNSDCEsDTz5-wuNwjttb3Xt35D97-KyyEiaqolMQ,35628
|
77
77
|
sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
|
78
|
-
sunholo/discovery_engine/discovery_engine_client.py,sha256=
|
78
|
+
sunholo/discovery_engine/discovery_engine_client.py,sha256=xOcOj7D49bh7aKvDM1KujrFxu8dhgbFugSJv-fUPjt0,62444
|
79
79
|
sunholo/discovery_engine/get_ai_search_chunks.py,sha256=I6Dt1CznqEvE7XIZ2PkLqopmjpO96iVEWJJqL5cJjOU,5554
|
80
80
|
sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
|
81
81
|
sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
|
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
168
168
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
169
169
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
170
170
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
171
|
-
sunholo-0.
|
172
|
-
sunholo-0.
|
173
|
-
sunholo-0.
|
174
|
-
sunholo-0.
|
175
|
-
sunholo-0.
|
176
|
-
sunholo-0.
|
171
|
+
sunholo-0.136.0.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
172
|
+
sunholo-0.136.0.dist-info/METADATA,sha256=VeoffpbDPqtye_pewu_LASUZo-79qT6Rpb4UjrG1TQ8,10067
|
173
|
+
sunholo-0.136.0.dist-info/WHEEL,sha256=ooBFpIzZCPdw3uqIQsOo4qqbA4ZRPxHnOH7peeONza0,91
|
174
|
+
sunholo-0.136.0.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
175
|
+
sunholo-0.136.0.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
176
|
+
sunholo-0.136.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|