PyPI - sunholo - Versions diffs - 0.135.0__py3-none-any.whl → 0.136.1__py3-none-any.whl - Mend

sunholo 0.135.0py3-none-any.whl → 0.136.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

sunholo/discovery_engine/discovery_engine_client.py CHANGED Viewed

@@ -374,7 +374,8 @@ class DiscoveryEngineClient:
         parse_documents_to_string: bool = True,
         serving_config: str = "default_serving_config",
         data_store_ids: Optional[List[str]] = None,
-        filter_str:str=None
+        filter_str:str=None,
+        max_limit:int=None
     ):
         """Retrieves entire documents based on a query.
@@ -405,7 +406,8 @@ class DiscoveryEngineClient:
             parse_chunks_to_string=parse_documents_to_string,
             serving_config=serving_config,
             data_store_ids=data_store_ids,
-            content_search_spec_type="documents"
+            content_search_spec_type="documents",
+            max_limit=max_limit
         )
     async def async_get_documents(
@@ -415,7 +417,9 @@ class DiscoveryEngineClient:
         parse_documents_to_string: bool = True,
         serving_config: str = "default_serving_config",
         data_store_ids: Optional[List[str]] = None,
-        filter_str:str=None
+        filter_str:str=None,
+        max_limit:int=None
     ):
         """Asynchronously retrieves entire documents based on a query.
@@ -439,7 +443,9 @@ class DiscoveryEngineClient:
             parse_chunks_to_string=parse_documents_to_string,
             serving_config=serving_config,
             data_store_ids=data_store_ids,
-            content_search_spec_type="documents"
+            content_search_spec_type="documents",
+            max_limit=max_limit
         )
     def document_format(self, document):
@@ -476,44 +482,79 @@ class DiscoveryEngineClient:
             f"{derived_data}"
         )
-    def process_documents(self, response):
+    def process_documents(self, response, max_limit:int=None):
         """Process a search response containing documents into a formatted string."""
         all_documents = []
+        result_count = 0
         # Check if the response contains results
-        if not hasattr(response, 'results') or not response.results:
+        if not response or not hasattr(response, 'results') or not response.results:
             log.info(f'No results found in response: {response=}')
             return []
-        # Iterate through each result in the response
-        for result in response.results:
-            if hasattr(result, 'document'):
-                document = result.document
-                all_documents.append(self.document_format(document))
-            else:
-                log.warning("No document found in result")
+        should_break=False
+        # Process the pager properly
+        for page in response.pages:
+            if should_break:
+                break
+            if hasattr(page, 'results') and page.results:
+                for result in page.results:
+                    if result_count >= max_limit:
+                        log.info("Breaking results loop as max limit reached")
+                        should_break = True  # Set flag to break outer loop
+                        break
+                if hasattr(result, 'document'):
+                    document = result.document
+                    all_documents.append(self.document_format(document))
+                    result_count += 1
+                    # Check if we've reached max_limit
+                    if max_limit is not None and result_count >= max_limit:
+                        log.info(f"Reached max_limit of {max_limit} results, stopping processing")
+                        should_break = True
+                        break
+                else:
+                    log.warning("No document found in result")
         # Combine all documents into one long string
         result_string = "\n\n".join(all_documents)
         return result_string
-    async def async_process_documents(self, response):
+    async def async_process_documents(self, response, max_limit:int=None):
         """Process a search response containing documents into a formatted string asynchronously."""
         all_documents = []
+        result_count = 0
         # Check if the response contains results
-        if not hasattr(response, 'results') or not response.results:
+        if not response or not hasattr(response, 'results') or not response.results:
             log.info(f'No results found in response: {response=}')
             return []
-        # Iterate through each result in the response
-        for result in response.results:
-            if hasattr(result, 'document'):
-                document = result.document
-                all_documents.append(self.document_format(document))
-            else:
-                log.warning("No document found in result")
+        should_break=False
+        # Process the pager properly
+        async for page in response.pages:
+            if should_break:
+                break
+            if hasattr(page, 'results') and page.results:
+                for result in page.results:
+                    if result_count >= max_limit:
+                        log.info("Breaking results loop as max limit reached")
+                        should_break = True  # Set flag to break outer loop
+                        break
+                if hasattr(result, 'document'):
+                    document = result.document
+                    all_documents.append(self.document_format(document))
+                    result_count += 1
+                    # Check if we've reached max_limit
+                    if max_limit is not None and result_count >= max_limit:
+                        log.info(f"Reached max_limit of {max_limit} results, stopping processing")
+                        should_break = True
+                        break
+                else:
+                    log.warning("No document found in result")
         # Combine all documents into one long string
         result_string = "\n\n".join(all_documents)
@@ -781,7 +822,8 @@ class DiscoveryEngineClient:
                         page_size=10, parse_chunks_to_string=True,
                         serving_config="default_serving_config",
                         data_store_ids: Optional[List[str]] = None,
-                        content_search_spec_type="chunks"):
+                        content_search_spec_type="chunks",
+                        max_limit=None):
         """
         Searches with a generic filter string.
@@ -793,6 +835,8 @@ class DiscoveryEngineClient:
         Returns:
             discoveryengine.SearchResponse or str: The search response object or string of chunks.
         """
+        if max_limit is not None and max_limit < page_size:
+            page_size = max_limit
         serving_config_path = self.search_client.serving_config_path(
             self.project_id,
@@ -840,6 +884,33 @@ class DiscoveryEngineClient:
         except Exception as e:
             log.info(f"No results {search_request.data_store_specs=}: {str(e)}")
             return None
+        # Apply max_limit if needed
+        if content_search_spec_type=="documents" and max_limit is not None:
+            # For raw response objects (when parse_chunks_to_string=False)
+            if not parse_chunks_to_string:
+                # We need to limit the pager results before returning
+                limited_response = search_response
+                # Store the original pages iterator method
+                original_pages = limited_response.pages
+                # Override the pages property with a custom iterator that respects max_limit
+                def limited_pages_iterator():
+                    results_count = 0
+                    for page in original_pages:
+                        yield page
+                        # Count results in this page
+                        if hasattr(page, 'results'):
+                            results_count += len(page.results)
+                        # Stop if we've reached max_limit
+                        if results_count >= max_limit:
+                            break
+                # Replace the pages property with our custom iterator
+                limited_response.pages = limited_pages_iterator()
+                return limited_response
         if parse_chunks_to_string:
             if content_search_spec_type=="chunks":
@@ -850,7 +921,7 @@ class DiscoveryEngineClient:
                     return big_string
             elif content_search_spec_type=="documents":
-                big_string = self.process_documents(search_response)
+                big_string = self.process_documents(search_response, max_limit=max_limit)
                 log.info(f"Discovery engine documents string sample: {big_string[:100]}")
                 return big_string
@@ -864,7 +935,8 @@ class DiscoveryEngineClient:
                             page_size=10, parse_chunks_to_string=True,
                             serving_config="default_serving_config",
                             data_store_ids: Optional[List[str]] = None,
-                            content_search_spec_type="chunks"):
+                            content_search_spec_type="chunks",
+                            max_limit=None):
         """
         Searches with a generic filter string asynchronously.
@@ -876,6 +948,8 @@ class DiscoveryEngineClient:
         Returns:
             discoveryengine.SearchResponse or str: The search response object or string of chunks.
         """
+        if max_limit is not None and max_limit < page_size:
+            page_size = max_limit
         serving_config_path = self.async_search_client.serving_config_path(
             self.project_id,
@@ -922,6 +996,33 @@ class DiscoveryEngineClient:
             log.info(f"No results {search_request.data_store_specs=}: {str(e)}")
             return None
+        # Apply max_limit if needed
+        if content_search_spec_type=="documents" and max_limit is not None:
+            # For raw response objects (when parse_chunks_to_string=False)
+            if not parse_chunks_to_string:
+                # We need to limit the pager results before returning
+                limited_response = search_response
+                # Store the original pages iterator method
+                original_pages = limited_response.pages
+                # Override the pages property with a custom iterator that respects max_limit
+                async def limited_pages_iterator():
+                    results_count = 0
+                    async for page in original_pages:
+                        yield page
+                        # Count results in this page
+                        if hasattr(page, 'results'):
+                            results_count += len(page.results)
+                        # Stop if we've reached max_limit
+                        if results_count >= max_limit:
+                            break
+                # Replace the pages property with our custom iterator
+                limited_response.pages = limited_pages_iterator()
+                return limited_response
         if parse_chunks_to_string:
             if content_search_spec_type=="chunks":
                 if parse_chunks_to_string:
@@ -931,7 +1032,7 @@ class DiscoveryEngineClient:
                     return big_string
             elif content_search_spec_type=="documents":
-                big_string = await self.async_process_documents(search_response)
+                big_string = await self.async_process_documents(search_response, max_limit=max_limit)
                 log.info(f"Discovery engine documents string sample: {big_string[:100]}")
                 return big_string

{sunholo-0.135.0.dist-info → sunholo-0.136.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sunholo
-Version: 0.135.0
+Version: 0.136.1
 Summary: AI DevOps - a package to help deploy GenAI to the Cloud.
 Author-email: Holosun ApS <multivac@sunholo.com>
 License: Apache License, Version 2.0

{sunholo-0.135.0.dist-info → sunholo-0.136.1.dist-info}/RECORD RENAMED Viewed

@@ -75,7 +75,7 @@ sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8R
 sunholo/discovery_engine/chunker_handler.py,sha256=wkvXl4rFtYfN6AZUKdW9_QD49Whf77BukDbO82UwlAg,7480
 sunholo/discovery_engine/cli.py,sha256=tsKqNSDCEsDTz5-wuNwjttb3Xt35D97-KyyEiaqolMQ,35628
 sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
-sunholo/discovery_engine/discovery_engine_client.py,sha256=Sf7Sr6FYKA_jn19Ba2ENShrB1jnZ4HgPScuytDIuK9c,58705
+sunholo/discovery_engine/discovery_engine_client.py,sha256=-0vSF4vd26ihhj7_XkxZJ6TvUCJrThLxZ8lMSTe5vqs,63448
 sunholo/discovery_engine/get_ai_search_chunks.py,sha256=I6Dt1CznqEvE7XIZ2PkLqopmjpO96iVEWJJqL5cJjOU,5554
 sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
 sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
 sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
 sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
 sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
-sunholo-0.135.0.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
-sunholo-0.135.0.dist-info/METADATA,sha256=jLaY76jTW-W8S-9V7_9THZZ3-FroKO6HHoFKvloXXPI,10067
-sunholo-0.135.0.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
-sunholo-0.135.0.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
-sunholo-0.135.0.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
-sunholo-0.135.0.dist-info/RECORD,,
+sunholo-0.136.1.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
+sunholo-0.136.1.dist-info/METADATA,sha256=2aeqbpV_AQcKkI1c_WUgZh63q6a7ZMNaAbk5C9fSzAY,10067
+sunholo-0.136.1.dist-info/WHEEL,sha256=ooBFpIzZCPdw3uqIQsOo4qqbA4ZRPxHnOH7peeONza0,91
+sunholo-0.136.1.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
+sunholo-0.136.1.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
+sunholo-0.136.1.dist-info/RECORD,,

{sunholo-0.135.0.dist-info → sunholo-0.136.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (79.0.1)
+Generator: setuptools (80.0.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

{sunholo-0.135.0.dist-info → sunholo-0.136.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{sunholo-0.135.0.dist-info → sunholo-0.136.1.dist-info}/licenses/LICENSE.txt RENAMED Viewed

File without changes

{sunholo-0.135.0.dist-info → sunholo-0.136.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

sunholo 0.135.0__py3-none-any.whl → 0.136.1__py3-none-any.whl

sunholo 0.135.0py3-none-any.whl → 0.136.1py3-none-any.whl