PyPI - sunholo - Versions diffs - 0.134.2__py3-none-any.whl → 0.134.3__py3-none-any.whl - Mend

sunholo 0.134.2py3-none-any.whl → 0.134.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

sunholo/discovery_engine/cli.py CHANGED Viewed

@@ -198,16 +198,28 @@ def search_command(args):
             data_store_id=args.data_store_id, # Target datastore
             location=args.location
         )
-        # This calls get_chunks which returns string or pager
-        results_data = client.get_chunks(
-            query=args.query,
-            # num_previous_chunks=args.num_previous_chunks, # Ensure these args are added to parser if needed
-            # num_next_chunks=args.num_next_chunks, # Ensure these args are added to parser if needed
-            page_size=args.page_size,
-            parse_chunks_to_string=args.parse_chunks_to_string,
-            serving_config=args.serving_config,
-            # data_store_ids=args.data_store_ids # Ensure these args are added to parser if needed
-        )
+        if args.content_search_spec_type == "chunks":
+            # This calls get_chunks which returns string or pager
+            results_data = client.get_chunks(
+                query=args.query,
+                # num_previous_chunks=args.num_previous_chunks, # Ensure these args are added to parser if needed
+                # num_next_chunks=args.num_next_chunks, # Ensure these args are added to parser if needed
+                page_size=args.page_size,
+                parse_chunks_to_string=args.parse_chunks_to_string,
+                serving_config=args.serving_config,
+                # data_store_ids=args.data_store_ids # Ensure these args are added to parser if needed
+            )
+        elif args.content_search_spec_type == "documents":
+            results_data = client.get_documents(
+                query=args.query,
+                page_size=args.page_size,
+                parse_documents_to_string=args.parse_chunks_to_string,
+                serving_config=args.serving_config,
+                # data_store_ids=args.data_store_ids # Ensure these args are added to parser if needed
+            )
+        else:
+            raise ValueError("Invalid content_search_spec_type. Must be 'chunks' or 'documents'.")
         if args.parse_chunks_to_string:
             console.print("\n[bold magenta]--- Combined Chunk String ---[/bold magenta]")
@@ -512,8 +524,9 @@ def setup_discovery_engine_subparser(subparsers):
     search_parser.add_argument('--query', required=True, help='The search query')
     search_parser.add_argument('--data-store-id', required=True, help='Data store ID to search')
     search_parser.add_argument('--page-size', type=int, default=10, help='Max results per page')
-    search_parser.add_argument('--parse-chunks-to-string', action='store_true', help='Output results as one formatted string')
+    search_parser.add_argument('--parse-chunks-to-string', action='store_true', help='Output results as one formatted string. Only applicable for "chunks"')
     search_parser.add_argument('--serving-config', default='default_config', help='Serving config ID for the data store')
+    search_parser.add_argument('--content_search_spec_type', default="chunks", help='"chunks" or "documents" depending on data store type')
     # Add arguments for num_previous_chunks, num_next_chunks, data_store_ids if needed
     # search_parser.add_argument('--num-previous-chunks', type=int, default=3)
     # search_parser.add_argument('--num-next-chunks', type=int, default=3)
@@ -529,6 +542,8 @@ def setup_discovery_engine_subparser(subparsers):
     search_by_id_parser.add_argument('--page-size', type=int, default=10, help='Max results per page')
     search_by_id_parser.add_argument('--parse-chunks-to-string', action='store_true', help='Output results as one formatted string')
     search_by_id_parser.add_argument('--serving-config', default='default_config', help='Serving config ID')
+    search_by_id_parser.add_argument('--content_search_spec_type', default="chunks", help='"chunks" or "documents" depending on data store type')
     # Add arguments for num_previous_chunks, num_next_chunks, data_store_ids if needed
     # search_by_id_parser.add_argument('--num-previous-chunks', type=int, default=3)
     # search_by_id_parser.add_argument('--num-next-chunks', type=int, default=3)

sunholo/discovery_engine/discovery_engine_client.py CHANGED Viewed

@@ -218,16 +218,16 @@ class DiscoveryEngineClient:
         Args:
             query (str): The search query.
-            collection_id (str): The ID of the collection to search.
             num_previous_chunks (int, optional): Number of previous chunks to return for context (default is 3).
             num_next_chunks (int, optional): Number of next chunks to return for context (default is 3).
             page_size (int, optional): The maximum number of results to return per page (default is 10).
             parse_chunks_to_string: If True will put chunks in one big string, False will return object
             serving_config: The resource name of the Search serving config
-            data_store_ids: If you want to search over many data stores, not just the one that was used to init the class. They should be of the format projects/{project}/locations/{location}/collections/{collection_id}/dataStores/{data_store_id}
+            data_store_ids: If you want to search over many data stores, not just the one that was used to init the class.
+                            They should be of the format projects/{project}/locations/{location}/collections/{collection_id}/dataStores/{data_store_id}
         Returns:
-            discoveryengine.SearchResponse: The search response object containing the search results.
+            discoveryengine.SearchResponse or str: The search response object or string of chunks.
         Example:
             ```python
@@ -237,52 +237,19 @@ class DiscoveryEngineClient:
                     print(f"Chunk: {chunk.snippet}, document name: {chunk.document_name}")
             ```
         """
-        serving_config_path = self.search_client.serving_config_path(
-            self.project_id,
-            self.location,
-            self.data_store_id,
-            serving_config
-        )
-        search_request = discoveryengine.SearchRequest(
-            serving_config=serving_config_path,
+        # Use search_with_filters with filter_str=None to perform a regular search
+        return self.search_with_filters(
             query=query,
-            page_size=page_size,
-            content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
-                search_result_mode="CHUNKS",
-                chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
-                    num_previous_chunks=num_previous_chunks,
-                    num_next_chunks=num_next_chunks,
-                ),
-            ),
+            filter_str=None,
+            num_previous_chunks=num_previous_chunks,
+            num_next_chunks=num_next_chunks,
+            page_size=page_size,
+            parse_chunks_to_string=parse_chunks_to_string,
+            serving_config=serving_config,
+            data_store_ids=data_store_ids,
+            content_search_spec_type="chunks"
         )
-        if data_store_ids:
-            search_request.data_store_specs = [
-                discoveryengine.SearchRequest.DataStoreSpec(
-                    data_store=self._search_data_store_path(data_store_id, serving_config=serving_config)
-                )
-                for data_store_id in data_store_ids
-            ]
-        try:
-            log.info(f"Discovery engine request: {search_request=}")
-            search_response = self.search_client.search(search_request)
-        except Exception as err:
-            log.warning(f"Error searching {search_request=} - no results found? {str(err)}")
-            search_response = []
-        if parse_chunks_to_string:
-            big_string = self.process_chunks(search_response)
-            log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
-            return big_string
-        log.info("Discovery engine response object")
-        return search_response
     async def async_get_chunks(
         self,
         query: str,
@@ -293,73 +260,32 @@ class DiscoveryEngineClient:
         serving_config: str = "default_serving_config",
         data_store_ids: Optional[List[str]] = None,
     ):
-        """Retrieves chunks or documents based on a query.
+        """Asynchronously retrieves chunks or documents based on a query.
         Args:
             query (str): The search query.
-            collection_id (str): The ID of the collection to search.
             num_previous_chunks (int, optional): Number of previous chunks to return for context (default is 3).
             num_next_chunks (int, optional): Number of next chunks to return for context (default is 3).
             page_size (int, optional): The maximum number of results to return per page (default is 10).
             parse_chunks_to_string: If True will put chunks in one big string, False will return object
             serving_config: The resource name of the Search serving config
-            data_store_ids: If you want to search over many data stores, not just the one that was used to init the class. They should be of the format projects/{project}/locations/{location}/collections/{collection_id}/dataStores/{data_store_id}
+            data_store_ids: If you want to search over many data stores, not just the one that was used to init the class.
+                            They should be of the format projects/{project}/locations/{location}/collections/{collection_id}/dataStores/{data_store_id}
         Returns:
-            discoveryengine.SearchResponse: The search response object containing the search results.
-        Example:
-            ```python
-            search_response = client.get_chunks('your query', 'your_collection_id')
-            for result in search_response.results:
-                for chunk in result.document.chunks:
-                    print(f"Chunk: {chunk.snippet}, document name: {chunk.document_name}")
-            ```
+            discoveryengine.SearchResponse or str: The search response object or string of chunks.
         """
-        serving_config_path = self.async_search_client.serving_config_path(
-            self.project_id,
-            self.location,
-            self.data_store_id,
-            serving_config
-        )
-        search_request = discoveryengine.SearchRequest(
-            serving_config=serving_config_path,
+        # Use async_search_with_filters with filter_str=None to perform a regular search
+        return await self.async_search_with_filters(
             query=query,
-            page_size=page_size,
-            content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
-                search_result_mode="CHUNKS",
-                chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
-                    num_previous_chunks=num_previous_chunks,
-                    num_next_chunks=num_next_chunks,
-                ),
-            ),
+            filter_str=None,
+            num_previous_chunks=num_previous_chunks,
+            num_next_chunks=num_next_chunks,
+            page_size=page_size,
+            parse_chunks_to_string=parse_chunks_to_string,
+            serving_config=serving_config,
+            data_store_ids=data_store_ids
         )
-        if data_store_ids:
-            search_request.data_store_specs = [
-                discoveryengine.SearchRequest.DataStoreSpec(data_store=data_store_id)
-                for data_store_id in data_store_ids
-            ]
-        try:
-            log.info(f"Discovery engine request: {search_request=}")
-            search_response = self.async_search_client.search(search_request)
-        except Exception as err:
-            log.warning(f"Error searching {search_request=} - no results found? {str(err)}")
-            search_response = []
-        if parse_chunks_to_string:
-            big_string = await self.async_process_chunks(search_response)
-            log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
-            return big_string
-        log.info("Discovery engine response object")
-        return search_response
     def chunk_format(self, chunk):
@@ -432,6 +358,79 @@ class DiscoveryEngineClient:
         result_string = "\n".join(all_chunks)
         return result_string
+    def get_documents(
+        self,
+        query: str,
+        page_size: int = 10,
+        parse_documents_to_string: bool = True,
+        serving_config: str = "default_serving_config",
+        data_store_ids: Optional[List[str]] = None,
+    ):
+        """Retrieves entire documents based on a query.
+        Args:
+            query (str): The search query.
+            page_size (int, optional): The maximum number of results to return per page (default is 10).
+            parse_documents_to_string: If True will put documents in one big string, False will return object
+            serving_config: The resource name of the Search serving config
+            data_store_ids: If you want to search over many data stores, not just the one that was used to init the class.
+                            They should be of the format projects/{project}/locations/{location}/collections/{collection_id}/dataStores/{data_store_id}
+        Returns:
+            discoveryengine.SearchResponse or str: The search response object or string of documents.
+        Example:
+            ```python
+            search_response = client.get_documents('your query')
+            for result in search_response.results:
+                doc = result.document
+                print(f"Document: {doc.name}, Title: {doc.derived_struct_data.get('title')}")
+            ```
+        """
+        # Use search_with_filters with content_search_spec_type="documents" to get documents instead of chunks
+        return self.search_with_filters(
+            query=query,
+            filter_str=None,
+            page_size=page_size,
+            parse_chunks_to_string=parse_documents_to_string,
+            serving_config=serving_config,
+            data_store_ids=data_store_ids,
+            content_search_spec_type="documents"
+        )
+    async def async_get_documents(
+        self,
+        query: str,
+        page_size: int = 10,
+        parse_documents_to_string: bool = True,
+        serving_config: str = "default_serving_config",
+        data_store_ids: Optional[List[str]] = None,
+    ):
+        """Asynchronously retrieves entire documents based on a query.
+        Args:
+            query (str): The search query.
+            page_size (int, optional): The maximum number of results to return per page (default is 10).
+            parse_documents_to_string: If True will put documents in one big string, False will return object
+            serving_config: The resource name of the Search serving config
+            data_store_ids: If you want to search over many data stores, not just the one that was used to init the class.
+                            They should be of the format projects/{project}/locations/{location}/collections/{collection_id}/dataStores/{data_store_id}
+        Returns:
+            discoveryengine.SearchResponse or str: The search response object or string of documents.
+        """
+        # Note: You'll need to update async_search_with_filters to handle content_search_spec_type
+        # as it doesn't currently have that parameter
+        return await self.async_search_with_filters(
+            query=query,
+            filter_str=None,
+            page_size=page_size,
+            parse_chunks_to_string=parse_documents_to_string,
+            serving_config=serving_config,
+            data_store_ids=data_store_ids,
+            content_search_spec_type="documents"
+        )
     def create_engine(self,
         engine_id: str,
@@ -693,7 +692,8 @@ class DiscoveryEngineClient:
                         num_previous_chunks=3, num_next_chunks=3,
                         page_size=10, parse_chunks_to_string=True,
                         serving_config="default_serving_config",
-                        data_store_ids: Optional[List[str]] = None):
+                        data_store_ids: Optional[List[str]] = None,
+                        content_search_spec_type="chunks"):
         """
         Searches with a generic filter string.
@@ -713,17 +713,26 @@ class DiscoveryEngineClient:
             serving_config
         )
+        if content_search_spec_type == "chunks":
+            content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
+                            search_result_mode="CHUNKS",
+                            chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
+                                num_previous_chunks=num_previous_chunks,
+                                num_next_chunks=num_next_chunks,
+                            ),
+                        )
+        elif content_search_spec_type == "documents":
+            content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
+                            search_result_mode="DOCUMENTS"
+                            )
+        else:
+            raise ValueError(f"Unknown content_search_spec_type={content_search_spec_type}")
         search_request = discoveryengine.SearchRequest(
             serving_config=serving_config_path,
             query=query,
             page_size=page_size,
-            content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
-                search_result_mode="CHUNKS",
-                chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
-                    num_previous_chunks=num_previous_chunks,
-                    num_next_chunks=num_next_chunks,
-                ),
-            ),
+            content_search_spec=content_search_spec,
             filter=filter_str # name:'ANY("king kong")'
         )
@@ -756,7 +765,8 @@ class DiscoveryEngineClient:
                             num_previous_chunks=3, num_next_chunks=3,
                             page_size=10, parse_chunks_to_string=True,
                             serving_config="default_serving_config",
-                            data_store_ids: Optional[List[str]] = None):
+                            data_store_ids: Optional[List[str]] = None,
+                            content_search_spec_type="chunks"):
         """
         Searches with a generic filter string asynchronously.
@@ -776,17 +786,26 @@ class DiscoveryEngineClient:
             serving_config
         )
+        if content_search_spec_type == "chunks":
+            content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
+                            search_result_mode="CHUNKS",
+                            chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
+                                num_previous_chunks=num_previous_chunks,
+                                num_next_chunks=num_next_chunks,
+                            ),
+                        )
+        elif content_search_spec_type == "documents":
+            content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
+                            search_result_mode="DOCUMENTS"
+                            )
+        else:
+            raise ValueError(f"Unknown content_search_spec_type={content_search_spec_type}")
         search_request = discoveryengine.SearchRequest(
             serving_config=serving_config_path,
             query=query,
             page_size=page_size,
-            content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
-                search_result_mode="CHUNKS",
-                chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
-                    num_previous_chunks=num_previous_chunks,
-                    num_next_chunks=num_next_chunks,
-                ),
-            ),
+            content_search_spec=content_search_spec,
             filter=filter_str # name:'ANY("king kong")'
         )

{sunholo-0.134.2.dist-info → sunholo-0.134.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sunholo
-Version: 0.134.2
+Version: 0.134.3
 Summary: AI DevOps - a package to help deploy GenAI to the Cloud.
 Author-email: Holosun ApS <multivac@sunholo.com>
 License: Apache License, Version 2.0

{sunholo-0.134.2.dist-info → sunholo-0.134.3.dist-info}/RECORD RENAMED Viewed

@@ -73,9 +73,9 @@ sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUt
 sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
 sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8RJ9OyKdQ,130
 sunholo/discovery_engine/chunker_handler.py,sha256=wkvXl4rFtYfN6AZUKdW9_QD49Whf77BukDbO82UwlAg,7480
-sunholo/discovery_engine/cli.py,sha256=so8blTu708TjbyifWdZG_eju0p9L98wLq5Lpl9T-yGo,33159
+sunholo/discovery_engine/cli.py,sha256=bUhCPoKrkMtdeTBHLyCZf9syVwHn5kE0yXpqDBIzmTc,34120
 sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
-sunholo/discovery_engine/discovery_engine_client.py,sha256=lB6D05ZOXm9Avl6hM6vJZvPZD_TzNroyBl-E5cJYWAk,52661
+sunholo/discovery_engine/discovery_engine_client.py,sha256=D_OiMiMDScwC426xzgbMpAPNV9Q8xaz4y_waDeRPhVQ,54496
 sunholo/discovery_engine/get_ai_search_chunks.py,sha256=I6Dt1CznqEvE7XIZ2PkLqopmjpO96iVEWJJqL5cJjOU,5554
 sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
 sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
 sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
 sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
 sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
-sunholo-0.134.2.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
-sunholo-0.134.2.dist-info/METADATA,sha256=Ujn_UARqsdpkBJdyHwDnHwS38VO5l58T9vNMD6R55x8,10067
-sunholo-0.134.2.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
-sunholo-0.134.2.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
-sunholo-0.134.2.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
-sunholo-0.134.2.dist-info/RECORD,,
+sunholo-0.134.3.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
+sunholo-0.134.3.dist-info/METADATA,sha256=XicSY1z4sd8PfmmNYnZyKvKYEDOMauxj-uf7WCOs328,10067
+sunholo-0.134.3.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
+sunholo-0.134.3.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
+sunholo-0.134.3.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
+sunholo-0.134.3.dist-info/RECORD,,

{sunholo-0.134.2.dist-info → sunholo-0.134.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{sunholo-0.134.2.dist-info → sunholo-0.134.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{sunholo-0.134.2.dist-info → sunholo-0.134.3.dist-info}/licenses/LICENSE.txt RENAMED Viewed

File without changes

{sunholo-0.134.2.dist-info → sunholo-0.134.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

sunholo 0.134.2__py3-none-any.whl → 0.134.3__py3-none-any.whl

sunholo 0.134.2py3-none-any.whl → 0.134.3py3-none-any.whl