sunholo 0.135.0__py3-none-any.whl → 0.136.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -374,7 +374,8 @@ class DiscoveryEngineClient:
374
374
  parse_documents_to_string: bool = True,
375
375
  serving_config: str = "default_serving_config",
376
376
  data_store_ids: Optional[List[str]] = None,
377
- filter_str:str=None
377
+ filter_str:str=None,
378
+ max_limit:int=None
378
379
  ):
379
380
  """Retrieves entire documents based on a query.
380
381
 
@@ -405,7 +406,8 @@ class DiscoveryEngineClient:
405
406
  parse_chunks_to_string=parse_documents_to_string,
406
407
  serving_config=serving_config,
407
408
  data_store_ids=data_store_ids,
408
- content_search_spec_type="documents"
409
+ content_search_spec_type="documents",
410
+ max_limit=max_limit
409
411
  )
410
412
 
411
413
  async def async_get_documents(
@@ -415,7 +417,9 @@ class DiscoveryEngineClient:
415
417
  parse_documents_to_string: bool = True,
416
418
  serving_config: str = "default_serving_config",
417
419
  data_store_ids: Optional[List[str]] = None,
418
- filter_str:str=None
420
+ filter_str:str=None,
421
+ max_limit:int=None
422
+
419
423
  ):
420
424
  """Asynchronously retrieves entire documents based on a query.
421
425
 
@@ -439,7 +443,9 @@ class DiscoveryEngineClient:
439
443
  parse_chunks_to_string=parse_documents_to_string,
440
444
  serving_config=serving_config,
441
445
  data_store_ids=data_store_ids,
442
- content_search_spec_type="documents"
446
+ content_search_spec_type="documents",
447
+ max_limit=max_limit
448
+
443
449
  )
444
450
 
445
451
  def document_format(self, document):
@@ -476,20 +482,26 @@ class DiscoveryEngineClient:
476
482
  f"{derived_data}"
477
483
  )
478
484
 
479
- def process_documents(self, response):
485
+ def process_documents(self, response, max_limit:int=None):
480
486
  """Process a search response containing documents into a formatted string."""
481
487
  all_documents = []
482
-
488
+ result_count = 0
483
489
  # Check if the response contains results
484
490
  if not hasattr(response, 'results') or not response.results:
485
491
  log.info(f'No results found in response: {response=}')
486
492
  return []
487
493
 
488
- # Iterate through each result in the response
494
+ # Iterate through each result in the page
489
495
  for result in response.results:
490
496
  if hasattr(result, 'document'):
491
497
  document = result.document
492
498
  all_documents.append(self.document_format(document))
499
+ result_count += 1
500
+
501
+ # Check if we've reached max_limit
502
+ if max_limit is not None and result_count >= max_limit:
503
+ log.info(f"Reached max_limit of {max_limit} results, stopping processing")
504
+ break
493
505
  else:
494
506
  log.warning("No document found in result")
495
507
 
@@ -498,20 +510,27 @@ class DiscoveryEngineClient:
498
510
 
499
511
  return result_string
500
512
 
501
- async def async_process_documents(self, response):
513
+ async def async_process_documents(self, response, max_limit:int=None):
502
514
  """Process a search response containing documents into a formatted string asynchronously."""
503
515
  all_documents = []
516
+ result_count = 0
504
517
 
505
518
  # Check if the response contains results
506
519
  if not hasattr(response, 'results') or not response.results:
507
520
  log.info(f'No results found in response: {response=}')
508
521
  return []
509
522
 
510
- # Iterate through each result in the response
523
+ # Iterate through each result in the page
511
524
  for result in response.results:
512
525
  if hasattr(result, 'document'):
513
526
  document = result.document
514
527
  all_documents.append(self.document_format(document))
528
+ result_count += 1
529
+
530
+ # Check if we've reached max_limit
531
+ if max_limit is not None and result_count >= max_limit:
532
+ log.info(f"Reached max_limit of {max_limit} results, stopping processing")
533
+ break
515
534
  else:
516
535
  log.warning("No document found in result")
517
536
 
@@ -781,7 +800,8 @@ class DiscoveryEngineClient:
781
800
  page_size=10, parse_chunks_to_string=True,
782
801
  serving_config="default_serving_config",
783
802
  data_store_ids: Optional[List[str]] = None,
784
- content_search_spec_type="chunks"):
803
+ content_search_spec_type="chunks",
804
+ max_limit=None):
785
805
  """
786
806
  Searches with a generic filter string.
787
807
 
@@ -793,6 +813,8 @@ class DiscoveryEngineClient:
793
813
  Returns:
794
814
  discoveryengine.SearchResponse or str: The search response object or string of chunks.
795
815
  """
816
+ if max_limit is not None and max_limit < page_size:
817
+ page_size = max_limit
796
818
 
797
819
  serving_config_path = self.search_client.serving_config_path(
798
820
  self.project_id,
@@ -840,6 +862,33 @@ class DiscoveryEngineClient:
840
862
  except Exception as e:
841
863
  log.info(f"No results {search_request.data_store_specs=}: {str(e)}")
842
864
  return None
865
+
866
+ # Apply max_limit if needed
867
+ if content_search_spec_type=="documents" and max_limit is not None:
868
+ # For raw response objects (when parse_chunks_to_string=False)
869
+ if not parse_chunks_to_string:
870
+ # We need to limit the pager results before returning
871
+ limited_response = search_response
872
+ # Store the original pages iterator method
873
+ original_pages = limited_response.pages
874
+
875
+ # Override the pages property with a custom iterator that respects max_limit
876
+ def limited_pages_iterator():
877
+ results_count = 0
878
+ for page in original_pages:
879
+ yield page
880
+
881
+ # Count results in this page
882
+ if hasattr(page, 'results'):
883
+ results_count += len(page.results)
884
+
885
+ # Stop if we've reached max_limit
886
+ if results_count >= max_limit:
887
+ break
888
+
889
+ # Replace the pages property with our custom iterator
890
+ limited_response.pages = limited_pages_iterator()
891
+ return limited_response
843
892
 
844
893
  if parse_chunks_to_string:
845
894
  if content_search_spec_type=="chunks":
@@ -850,7 +899,7 @@ class DiscoveryEngineClient:
850
899
  return big_string
851
900
 
852
901
  elif content_search_spec_type=="documents":
853
- big_string = self.process_documents(search_response)
902
+ big_string = self.process_documents(search_response, max_limit=max_limit)
854
903
  log.info(f"Discovery engine documents string sample: {big_string[:100]}")
855
904
 
856
905
  return big_string
@@ -864,7 +913,8 @@ class DiscoveryEngineClient:
864
913
  page_size=10, parse_chunks_to_string=True,
865
914
  serving_config="default_serving_config",
866
915
  data_store_ids: Optional[List[str]] = None,
867
- content_search_spec_type="chunks"):
916
+ content_search_spec_type="chunks",
917
+ max_limit=None):
868
918
  """
869
919
  Searches with a generic filter string asynchronously.
870
920
 
@@ -876,6 +926,8 @@ class DiscoveryEngineClient:
876
926
  Returns:
877
927
  discoveryengine.SearchResponse or str: The search response object or string of chunks.
878
928
  """
929
+ if max_limit is not None and max_limit < page_size:
930
+ page_size = max_limit
879
931
 
880
932
  serving_config_path = self.async_search_client.serving_config_path(
881
933
  self.project_id,
@@ -922,6 +974,33 @@ class DiscoveryEngineClient:
922
974
  log.info(f"No results {search_request.data_store_specs=}: {str(e)}")
923
975
  return None
924
976
 
977
+ # Apply max_limit if needed
978
+ if content_search_spec_type=="documents" and max_limit is not None:
979
+ # For raw response objects (when parse_chunks_to_string=False)
980
+ if not parse_chunks_to_string:
981
+ # We need to limit the pager results before returning
982
+ limited_response = search_response
983
+ # Store the original pages iterator method
984
+ original_pages = limited_response.pages
985
+
986
+ # Override the pages property with a custom iterator that respects max_limit
987
+ async def limited_pages_iterator():
988
+ results_count = 0
989
+ async for page in original_pages:
990
+ yield page
991
+
992
+ # Count results in this page
993
+ if hasattr(page, 'results'):
994
+ results_count += len(page.results)
995
+
996
+ # Stop if we've reached max_limit
997
+ if results_count >= max_limit:
998
+ break
999
+
1000
+ # Replace the pages property with our custom iterator
1001
+ limited_response.pages = limited_pages_iterator()
1002
+ return limited_response
1003
+
925
1004
  if parse_chunks_to_string:
926
1005
  if content_search_spec_type=="chunks":
927
1006
  if parse_chunks_to_string:
@@ -931,7 +1010,7 @@ class DiscoveryEngineClient:
931
1010
  return big_string
932
1011
 
933
1012
  elif content_search_spec_type=="documents":
934
- big_string = await self.async_process_documents(search_response)
1013
+ big_string = await self.async_process_documents(search_response, max_limit=max_limit)
935
1014
  log.info(f"Discovery engine documents string sample: {big_string[:100]}")
936
1015
 
937
1016
  return big_string
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sunholo
3
- Version: 0.135.0
3
+ Version: 0.136.0
4
4
  Summary: AI DevOps - a package to help deploy GenAI to the Cloud.
5
5
  Author-email: Holosun ApS <multivac@sunholo.com>
6
6
  License: Apache License, Version 2.0
@@ -75,7 +75,7 @@ sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8R
75
75
  sunholo/discovery_engine/chunker_handler.py,sha256=wkvXl4rFtYfN6AZUKdW9_QD49Whf77BukDbO82UwlAg,7480
76
76
  sunholo/discovery_engine/cli.py,sha256=tsKqNSDCEsDTz5-wuNwjttb3Xt35D97-KyyEiaqolMQ,35628
77
77
  sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
78
- sunholo/discovery_engine/discovery_engine_client.py,sha256=Sf7Sr6FYKA_jn19Ba2ENShrB1jnZ4HgPScuytDIuK9c,58705
78
+ sunholo/discovery_engine/discovery_engine_client.py,sha256=xOcOj7D49bh7aKvDM1KujrFxu8dhgbFugSJv-fUPjt0,62444
79
79
  sunholo/discovery_engine/get_ai_search_chunks.py,sha256=I6Dt1CznqEvE7XIZ2PkLqopmjpO96iVEWJJqL5cJjOU,5554
80
80
  sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
81
81
  sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
168
168
  sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
169
169
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
170
170
  sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
171
- sunholo-0.135.0.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
- sunholo-0.135.0.dist-info/METADATA,sha256=jLaY76jTW-W8S-9V7_9THZZ3-FroKO6HHoFKvloXXPI,10067
173
- sunholo-0.135.0.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
174
- sunholo-0.135.0.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
- sunholo-0.135.0.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
- sunholo-0.135.0.dist-info/RECORD,,
171
+ sunholo-0.136.0.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
+ sunholo-0.136.0.dist-info/METADATA,sha256=VeoffpbDPqtye_pewu_LASUZo-79qT6Rpb4UjrG1TQ8,10067
173
+ sunholo-0.136.0.dist-info/WHEEL,sha256=ooBFpIzZCPdw3uqIQsOo4qqbA4ZRPxHnOH7peeONza0,91
174
+ sunholo-0.136.0.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
+ sunholo-0.136.0.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
+ sunholo-0.136.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (79.0.1)
2
+ Generator: setuptools (80.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5