sunholo 0.135.0__py3-none-any.whl → 0.136.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -374,7 +374,8 @@ class DiscoveryEngineClient:
374
374
  parse_documents_to_string: bool = True,
375
375
  serving_config: str = "default_serving_config",
376
376
  data_store_ids: Optional[List[str]] = None,
377
- filter_str:str=None
377
+ filter_str:str=None,
378
+ max_limit:int=None
378
379
  ):
379
380
  """Retrieves entire documents based on a query.
380
381
 
@@ -405,7 +406,8 @@ class DiscoveryEngineClient:
405
406
  parse_chunks_to_string=parse_documents_to_string,
406
407
  serving_config=serving_config,
407
408
  data_store_ids=data_store_ids,
408
- content_search_spec_type="documents"
409
+ content_search_spec_type="documents",
410
+ max_limit=max_limit
409
411
  )
410
412
 
411
413
  async def async_get_documents(
@@ -415,7 +417,9 @@ class DiscoveryEngineClient:
415
417
  parse_documents_to_string: bool = True,
416
418
  serving_config: str = "default_serving_config",
417
419
  data_store_ids: Optional[List[str]] = None,
418
- filter_str:str=None
420
+ filter_str:str=None,
421
+ max_limit:int=None
422
+
419
423
  ):
420
424
  """Asynchronously retrieves entire documents based on a query.
421
425
 
@@ -439,7 +443,9 @@ class DiscoveryEngineClient:
439
443
  parse_chunks_to_string=parse_documents_to_string,
440
444
  serving_config=serving_config,
441
445
  data_store_ids=data_store_ids,
442
- content_search_spec_type="documents"
446
+ content_search_spec_type="documents",
447
+ max_limit=max_limit
448
+
443
449
  )
444
450
 
445
451
  def document_format(self, document):
@@ -476,44 +482,79 @@ class DiscoveryEngineClient:
476
482
  f"{derived_data}"
477
483
  )
478
484
 
479
- def process_documents(self, response):
485
+ def process_documents(self, response, max_limit:int=None):
480
486
  """Process a search response containing documents into a formatted string."""
481
487
  all_documents = []
482
-
488
+ result_count = 0
483
489
  # Check if the response contains results
484
- if not hasattr(response, 'results') or not response.results:
490
+ if not response or not hasattr(response, 'results') or not response.results:
485
491
  log.info(f'No results found in response: {response=}')
486
492
  return []
487
493
 
488
- # Iterate through each result in the response
489
- for result in response.results:
490
- if hasattr(result, 'document'):
491
- document = result.document
492
- all_documents.append(self.document_format(document))
493
- else:
494
- log.warning("No document found in result")
494
+ should_break=False
495
+ # Process the pager properly
496
+ for page in response.pages:
497
+ if should_break:
498
+ break
499
+ if hasattr(page, 'results') and page.results:
500
+ for result in page.results:
501
+ if result_count >= max_limit:
502
+ log.info("Breaking results loop as max limit reached")
503
+ should_break = True # Set flag to break outer loop
504
+ break
505
+
506
+ if hasattr(result, 'document'):
507
+ document = result.document
508
+ all_documents.append(self.document_format(document))
509
+ result_count += 1
510
+
511
+ # Check if we've reached max_limit
512
+ if max_limit is not None and result_count >= max_limit:
513
+ log.info(f"Reached max_limit of {max_limit} results, stopping processing")
514
+ should_break = True
515
+ break
516
+ else:
517
+ log.warning("No document found in result")
495
518
 
496
519
  # Combine all documents into one long string
497
520
  result_string = "\n\n".join(all_documents)
498
521
 
499
522
  return result_string
500
523
 
501
- async def async_process_documents(self, response):
524
+ async def async_process_documents(self, response, max_limit:int=None):
502
525
  """Process a search response containing documents into a formatted string asynchronously."""
503
526
  all_documents = []
527
+ result_count = 0
504
528
 
505
529
  # Check if the response contains results
506
- if not hasattr(response, 'results') or not response.results:
530
+ if not response or not hasattr(response, 'results') or not response.results:
507
531
  log.info(f'No results found in response: {response=}')
508
532
  return []
509
-
510
- # Iterate through each result in the response
511
- for result in response.results:
512
- if hasattr(result, 'document'):
513
- document = result.document
514
- all_documents.append(self.document_format(document))
515
- else:
516
- log.warning("No document found in result")
533
+
534
+ should_break=False
535
+ # Process the pager properly
536
+ async for page in response.pages:
537
+ if should_break:
538
+ break
539
+ if hasattr(page, 'results') and page.results:
540
+ for result in page.results:
541
+ if result_count >= max_limit:
542
+ log.info("Breaking results loop as max limit reached")
543
+ should_break = True # Set flag to break outer loop
544
+ break
545
+
546
+ if hasattr(result, 'document'):
547
+ document = result.document
548
+ all_documents.append(self.document_format(document))
549
+ result_count += 1
550
+
551
+ # Check if we've reached max_limit
552
+ if max_limit is not None and result_count >= max_limit:
553
+ log.info(f"Reached max_limit of {max_limit} results, stopping processing")
554
+ should_break = True
555
+ break
556
+ else:
557
+ log.warning("No document found in result")
517
558
 
518
559
  # Combine all documents into one long string
519
560
  result_string = "\n\n".join(all_documents)
@@ -781,7 +822,8 @@ class DiscoveryEngineClient:
781
822
  page_size=10, parse_chunks_to_string=True,
782
823
  serving_config="default_serving_config",
783
824
  data_store_ids: Optional[List[str]] = None,
784
- content_search_spec_type="chunks"):
825
+ content_search_spec_type="chunks",
826
+ max_limit=None):
785
827
  """
786
828
  Searches with a generic filter string.
787
829
 
@@ -793,6 +835,8 @@ class DiscoveryEngineClient:
793
835
  Returns:
794
836
  discoveryengine.SearchResponse or str: The search response object or string of chunks.
795
837
  """
838
+ if max_limit is not None and max_limit < page_size:
839
+ page_size = max_limit
796
840
 
797
841
  serving_config_path = self.search_client.serving_config_path(
798
842
  self.project_id,
@@ -840,6 +884,33 @@ class DiscoveryEngineClient:
840
884
  except Exception as e:
841
885
  log.info(f"No results {search_request.data_store_specs=}: {str(e)}")
842
886
  return None
887
+
888
+ # Apply max_limit if needed
889
+ if content_search_spec_type=="documents" and max_limit is not None:
890
+ # For raw response objects (when parse_chunks_to_string=False)
891
+ if not parse_chunks_to_string:
892
+ # We need to limit the pager results before returning
893
+ limited_response = search_response
894
+ # Store the original pages iterator method
895
+ original_pages = limited_response.pages
896
+
897
+ # Override the pages property with a custom iterator that respects max_limit
898
+ def limited_pages_iterator():
899
+ results_count = 0
900
+ for page in original_pages:
901
+ yield page
902
+
903
+ # Count results in this page
904
+ if hasattr(page, 'results'):
905
+ results_count += len(page.results)
906
+
907
+ # Stop if we've reached max_limit
908
+ if results_count >= max_limit:
909
+ break
910
+
911
+ # Replace the pages property with our custom iterator
912
+ limited_response.pages = limited_pages_iterator()
913
+ return limited_response
843
914
 
844
915
  if parse_chunks_to_string:
845
916
  if content_search_spec_type=="chunks":
@@ -850,7 +921,7 @@ class DiscoveryEngineClient:
850
921
  return big_string
851
922
 
852
923
  elif content_search_spec_type=="documents":
853
- big_string = self.process_documents(search_response)
924
+ big_string = self.process_documents(search_response, max_limit=max_limit)
854
925
  log.info(f"Discovery engine documents string sample: {big_string[:100]}")
855
926
 
856
927
  return big_string
@@ -864,7 +935,8 @@ class DiscoveryEngineClient:
864
935
  page_size=10, parse_chunks_to_string=True,
865
936
  serving_config="default_serving_config",
866
937
  data_store_ids: Optional[List[str]] = None,
867
- content_search_spec_type="chunks"):
938
+ content_search_spec_type="chunks",
939
+ max_limit=None):
868
940
  """
869
941
  Searches with a generic filter string asynchronously.
870
942
 
@@ -876,6 +948,8 @@ class DiscoveryEngineClient:
876
948
  Returns:
877
949
  discoveryengine.SearchResponse or str: The search response object or string of chunks.
878
950
  """
951
+ if max_limit is not None and max_limit < page_size:
952
+ page_size = max_limit
879
953
 
880
954
  serving_config_path = self.async_search_client.serving_config_path(
881
955
  self.project_id,
@@ -922,6 +996,33 @@ class DiscoveryEngineClient:
922
996
  log.info(f"No results {search_request.data_store_specs=}: {str(e)}")
923
997
  return None
924
998
 
999
+ # Apply max_limit if needed
1000
+ if content_search_spec_type=="documents" and max_limit is not None:
1001
+ # For raw response objects (when parse_chunks_to_string=False)
1002
+ if not parse_chunks_to_string:
1003
+ # We need to limit the pager results before returning
1004
+ limited_response = search_response
1005
+ # Store the original pages iterator method
1006
+ original_pages = limited_response.pages
1007
+
1008
+ # Override the pages property with a custom iterator that respects max_limit
1009
+ async def limited_pages_iterator():
1010
+ results_count = 0
1011
+ async for page in original_pages:
1012
+ yield page
1013
+
1014
+ # Count results in this page
1015
+ if hasattr(page, 'results'):
1016
+ results_count += len(page.results)
1017
+
1018
+ # Stop if we've reached max_limit
1019
+ if results_count >= max_limit:
1020
+ break
1021
+
1022
+ # Replace the pages property with our custom iterator
1023
+ limited_response.pages = limited_pages_iterator()
1024
+ return limited_response
1025
+
925
1026
  if parse_chunks_to_string:
926
1027
  if content_search_spec_type=="chunks":
927
1028
  if parse_chunks_to_string:
@@ -931,7 +1032,7 @@ class DiscoveryEngineClient:
931
1032
  return big_string
932
1033
 
933
1034
  elif content_search_spec_type=="documents":
934
- big_string = await self.async_process_documents(search_response)
1035
+ big_string = await self.async_process_documents(search_response, max_limit=max_limit)
935
1036
  log.info(f"Discovery engine documents string sample: {big_string[:100]}")
936
1037
 
937
1038
  return big_string
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sunholo
3
- Version: 0.135.0
3
+ Version: 0.136.1
4
4
  Summary: AI DevOps - a package to help deploy GenAI to the Cloud.
5
5
  Author-email: Holosun ApS <multivac@sunholo.com>
6
6
  License: Apache License, Version 2.0
@@ -75,7 +75,7 @@ sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8R
75
75
  sunholo/discovery_engine/chunker_handler.py,sha256=wkvXl4rFtYfN6AZUKdW9_QD49Whf77BukDbO82UwlAg,7480
76
76
  sunholo/discovery_engine/cli.py,sha256=tsKqNSDCEsDTz5-wuNwjttb3Xt35D97-KyyEiaqolMQ,35628
77
77
  sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
78
- sunholo/discovery_engine/discovery_engine_client.py,sha256=Sf7Sr6FYKA_jn19Ba2ENShrB1jnZ4HgPScuytDIuK9c,58705
78
+ sunholo/discovery_engine/discovery_engine_client.py,sha256=-0vSF4vd26ihhj7_XkxZJ6TvUCJrThLxZ8lMSTe5vqs,63448
79
79
  sunholo/discovery_engine/get_ai_search_chunks.py,sha256=I6Dt1CznqEvE7XIZ2PkLqopmjpO96iVEWJJqL5cJjOU,5554
80
80
  sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
81
81
  sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
168
168
  sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
169
169
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
170
170
  sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
171
- sunholo-0.135.0.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
- sunholo-0.135.0.dist-info/METADATA,sha256=jLaY76jTW-W8S-9V7_9THZZ3-FroKO6HHoFKvloXXPI,10067
173
- sunholo-0.135.0.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
174
- sunholo-0.135.0.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
- sunholo-0.135.0.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
- sunholo-0.135.0.dist-info/RECORD,,
171
+ sunholo-0.136.1.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
+ sunholo-0.136.1.dist-info/METADATA,sha256=2aeqbpV_AQcKkI1c_WUgZh63q6a7ZMNaAbk5C9fSzAY,10067
173
+ sunholo-0.136.1.dist-info/WHEEL,sha256=ooBFpIzZCPdw3uqIQsOo4qqbA4ZRPxHnOH7peeONza0,91
174
+ sunholo-0.136.1.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
+ sunholo-0.136.1.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
+ sunholo-0.136.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (79.0.1)
2
+ Generator: setuptools (80.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5