PyPI - sunholo - Versions diffs - 0.96.7__py3-none-any.whl → 0.96.9__py3-none-any.whl - Mend

sunholo 0.96.7py3-none-any.whl → 0.96.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

sunholo/discovery_engine/__init__.py CHANGED Viewed

@@ -1,2 +1,2 @@
 from .discovery_engine_client import DiscoveryEngineClient
-from .get_ai_search_chunks import get_all_chunks
+from .get_ai_search_chunks import get_all_chunks, async_get_all_chunks

sunholo/discovery_engine/discovery_engine_client.py CHANGED Viewed

@@ -69,8 +69,10 @@ class DiscoveryEngineClient:
         self.store_client  = discoveryengine.DataStoreServiceClient(client_options=client_options)
         self.doc_client    = discoveryengine.DocumentServiceClient(client_options=client_options)
         self.search_client = discoveryengine.SearchServiceClient(client_options=client_options)
+        self.async_search_client = discoveryengine.SearchServiceAsyncClient(client_options=client_options)
         self.engine_client = discoveryengine.EngineServiceClient(client_options=client_options)
     @classmethod
     def my_retry(cls):
         return Retry(
@@ -221,6 +223,71 @@ class DiscoveryEngineClient:
         log.info(f"Discovery engine request: {search_request=}")
         search_response = self.search_client.search(search_request)
+        if parse_chunks_to_string:
+            big_string = self.process_chunks(search_response)
+            log.info(f"Discovery engine chunks string sample: {big_string[:100]}")
+            return big_string
+        log.info("Discovery engine response object")
+        return search_response
+    async def async_get_chunks(
+        self,
+        query: str,
+        num_previous_chunks: int = 3,
+        num_next_chunks: int = 3,
+        page_size: int = 10,
+        parse_chunks_to_string: bool = True,
+        serving_config: str = "default_serving_config",
+    ):
+        """Retrieves chunks or documents based on a query.
+        Args:
+            query (str): The search query.
+            collection_id (str): The ID of the collection to search.
+            num_previous_chunks (int, optional): Number of previous chunks to return for context (default is 3).
+            num_next_chunks (int, optional): Number of next chunks to return for context (default is 3).
+            page_size (int, optional): The maximum number of results to return per page (default is 10).
+            parse_chunks_to_string: If True will put chunks in one big string, False will return object
+            serving_config: The resource name of the Search serving config
+        Returns:
+            discoveryengine.SearchResponse: The search response object containing the search results.
+        Example:
+            ```python
+            search_response = client.get_chunks('your query', 'your_collection_id')
+            for result in search_response.results:
+                for chunk in result.document.chunks:
+                    print(f"Chunk: {chunk.snippet}, document name: {chunk.document_name}")
+            ```
+        """
+        serving_config_path = self.async_search_client.serving_config_path(
+            self.project_id,
+            self.location,
+            self.data_store_id,
+            serving_config
+        )
+        search_request = discoveryengine.SearchRequest(
+            serving_config=serving_config_path,
+            query=query,
+            page_size=page_size,
+            content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
+                search_result_mode="CHUNKS",
+                chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
+                    num_previous_chunks=num_previous_chunks,
+                    num_next_chunks=num_next_chunks,
+                ),
+            ),
+        )
+        log.info(f"Discovery engine async request: {search_request=}")
+        search_response = self.async_search_client.search(search_request)
         if parse_chunks_to_string:
@@ -232,7 +299,7 @@ class DiscoveryEngineClient:
         log.info("Discovery engine response object")
         return search_response
-    def chunk_format(self, chunk: Chunk):
+    def chunk_format(self, chunk):
         return (
                     f"# {chunk.id}\n"
                     f"{chunk.content}\n"
@@ -241,7 +308,7 @@ class DiscoveryEngineClient:
                     f"Document Title: {chunk.document_metadata.title}\n"
                 )
-    def process_chunks(self, response: SearchResponse):
+    def process_chunks(self, response):
         all_chunks = []
         # Check if the response contains results
@@ -269,6 +336,35 @@ class DiscoveryEngineClient:
         result_string = "\n".join(all_chunks)
         return result_string
+    async def async_process_chunks(self, response):
+        all_chunks = []
+        # Check if the response contains results
+        if not hasattr(response, 'results') or not response.results:
+            raise ValueError(f'No results found in response: {response=}')
+        # Iterate through each result in the response
+        async for result in response.results:
+            chunk = result.chunk
+            chunk_metadata = chunk.ChunkMetadata
+            if hasattr(chunk_metadata, 'previous_chunks'):
+                # Process previous chunks
+                for prev_chunk in chunk_metadata.previous_chunks:
+                    all_chunks.append(self.chunk_format(prev_chunk))
+            all_chunks.append(self.chunk_format(chunk))
+            # Process next chunks
+            if hasattr(chunk_metadata, 'next_chunks'):
+                for next_chunk in chunk_metadata.next_chunks:
+                    all_chunks.append(self.chunk_format(next_chunk))
+        # Combine all chunks into one long string
+        result_string = "\n".join(all_chunks)
+        return result_string
     def create_engine(self,
         engine_id: str,

sunholo/discovery_engine/get_ai_search_chunks.py CHANGED Viewed

@@ -29,7 +29,9 @@ def get_all_chunks(question:str, config:ConfigManager):
                     new_vector_name = value.get('vector_name')
                     if not new_vector_name:
                         log.warning("read_only specified but no new vector_name to read from")
-                    vector_name = new_vector_name
+                        continue
+                    else:
+                        vector_name = new_vector_name
                 num_chunks = value.get('num_chunks') or 3
@@ -51,3 +53,49 @@ def get_chunks(question, vector_name, num_chunks):
+async def async_get_all_chunks(question:str, config:ConfigManager):
+    """
+    Look through a config memory key and find all Vertex AI Search retrievers, call them and return a joined string of chunks
+        args: question - question to search similarity for
+        config: A ConfigManager object
+        returns: a big string of chunks
+    """
+    memories = load_memories(config=config)
+    chunks = []
+    if not memories:
+        return None
+    vector_name = config.vector_name
+    for memory in memories:
+        for key, value in memory.items():  # Now iterate over the dictionary
+            log.info(f"Found memory {key}")
+            vectorstore = value.get('vectorstore')
+            if vectorstore == "discovery_engine" or vectorstore == "vertex_ai_search":
+                if value.get('read_only'):
+                    new_vector_name = value.get('vector_name')
+                    if not new_vector_name:
+                        log.warning("read_only specified but no new vector_name to read from")
+                        continue
+                    else:
+                        vector_name = new_vector_name
+                num_chunks = value.get('num_chunks') or 3
+                chunk = await async_get_chunks(question, vector_name, num_chunks)
+                if chunk:
+                    chunks.append(chunk)
+    if chunks:
+        return "\n".join(chunks)
+    log.warning(f"No chunks found for {vector_name}")
+    return None
+async def async_get_chunks(question, vector_name, num_chunks):
+    de = DiscoveryEngineClient(vector_name, project_id=get_gcp_project(include_config=True))
+    try:
+        return await de.async_get_chunks(question, num_previous_chunks=num_chunks, num_next_chunks=num_chunks)
+    except Exception as err:
+        log.error(f"No discovery engine chunks found: {str(err)}")

{sunholo-0.96.7.dist-info → sunholo-0.96.9.dist-info}/METADATA RENAMED Viewed

@@ -1,9 +1,9 @@
 Metadata-Version: 2.1
 Name: sunholo
-Version: 0.96.7
+Version: 0.96.9
 Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
 Home-page: https://github.com/sunholo-data/sunholo-py
-Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.96.7.tar.gz
+Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.96.9.tar.gz
 Author: Holosun ApS
 Author-email: multivac@sunholo.com
 License: Apache License, Version 2.0
@@ -148,6 +148,19 @@ This is the Sunholo Python project, a comprehensive toolkit for working with lan
 Please refer to the website for full documentation at https://dev.sunholo.com/
+## Listen to the audio file:
+A [NotebookLM](https://notebooklm.google/) generated podcast of the codebase that may help give you an overview of what the library is capable of:
+<audio controls>
+  <source src="https://drive.google.com/uc?export=download&id=1GvwRmiYDjPjN2hXQ8plhnVDByu6TmgCQ" type="audio/wav">
+  Your browser does not support the audio element.
+</audio>
+[Alternatively, Download the audio file from Google Drive](https://drive.google.com/uc?export=download&id=1GvwRmiYDjPjN2hXQ8plhnVDByu6TmgCQ)
 ## Tests via pytest
 If loading from GitHub, run tests:

{sunholo-0.96.7.dist-info → sunholo-0.96.9.dist-info}/RECORD RENAMED Viewed

@@ -70,11 +70,11 @@ sunholo/database/sql/sb/create_table.sql,sha256=SbcOrf5tUiVKGUohu1lau7IsbDRbTFbr
 sunholo/database/sql/sb/delete_source_row.sql,sha256=r6fEuUKdbiLHCDGKSbKINDCpJjsmfHZNNOo1ptwLLSo,75
 sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUtYL5cCxbC2mj_c,255
 sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
-sunholo/discovery_engine/__init__.py,sha256=P00bB8aVVWefOZbCQvzHsVMuP_sd-_d_4o5xCuCpN3g,108
+sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8RJ9OyKdQ,130
 sunholo/discovery_engine/chunker_handler.py,sha256=Fv4BLOBi_7ap3AiAy4TlTN48CLZSMurJ3TkvC75Euro,5123
 sunholo/discovery_engine/create_new.py,sha256=NzhSh6nG6nQ5J9gZh8IDph4JiEVT_DC5GGvP0GuwTWs,943
-sunholo/discovery_engine/discovery_engine_client.py,sha256=oORB2SVVqrYrz7E3srPrknyuR6Dl3SJJwaVrbVXJER4,17726
-sunholo/discovery_engine/get_ai_search_chunks.py,sha256=VPzdYoBP_E6Bko0KpX656QiIfJdwmje4sBnPtZs4JQ4,1963
+sunholo/discovery_engine/discovery_engine_client.py,sha256=FjcKCIeLz40Xn8DqwHZuHCYp2-oOFHw-doy1v-ULnEk,21536
+sunholo/discovery_engine/get_ai_search_chunks.py,sha256=6SO6v_4AcrUat0bP7wqC8xg9aY916Fnw_aZsogrLx-g,3877
 sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
 sunholo/embedder/embed_chunk.py,sha256=MCbTePWjUbIRVDFFhHJ94BvOZvIom62-mTr0PmfQyt0,6951
 sunholo/excel/__init__.py,sha256=AqTMN9K4qJYi4maEgoORc5oxDVGO_eqmwzDaVP37JgY,56
@@ -144,9 +144,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
 sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
 sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
 sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
-sunholo-0.96.7.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
-sunholo-0.96.7.dist-info/METADATA,sha256=z7WL3L3iW84irIoEJpODfgdMM7R1WZmCX9S9zUh8KqQ,7889
-sunholo-0.96.7.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-sunholo-0.96.7.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
-sunholo-0.96.7.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
-sunholo-0.96.7.dist-info/RECORD,,
+sunholo-0.96.9.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
+sunholo-0.96.9.dist-info/METADATA,sha256=7-B4Hs2tNbUkmGbDYWPYfJJCWDpe6WtoLMxbok8zH4A,8404
+sunholo-0.96.9.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+sunholo-0.96.9.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
+sunholo-0.96.9.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
+sunholo-0.96.9.dist-info/RECORD,,

{sunholo-0.96.7.dist-info → sunholo-0.96.9.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{sunholo-0.96.7.dist-info → sunholo-0.96.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{sunholo-0.96.7.dist-info → sunholo-0.96.9.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{sunholo-0.96.7.dist-info → sunholo-0.96.9.dist-info}/top_level.txt RENAMED Viewed

File without changes

sunholo 0.96.7__py3-none-any.whl → 0.96.9__py3-none-any.whl

sunholo 0.96.7py3-none-any.whl → 0.96.9py3-none-any.whl