PyPI - sunholo - Versions diffs - 0.78.4__py3-none-any.whl → 0.79.0__py3-none-any.whl - Mend

sunholo 0.78.4py3-none-any.whl → 0.79.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

sunholo/chunker/doc_handling.py CHANGED Viewed

@@ -175,6 +175,10 @@ Be careful not to add any speculation or any details that are not covered in the
                 bucket_name = os.getenv("DOC_BUCKET")
                 if not bucket_name:
                     raise ValueError("No DOC_BUCKET configured for summary")
+                if bucket_name.startswith("gs://"):
+                    bucket_name = bucket_name[len("gs://"):]
                 with tempfile.NamedTemporaryFile(mode='w+', delete=False) as temp_file:
                     temp_file.write(summary)
                     temp_file.flush()

sunholo/components/llm.py CHANGED Viewed

@@ -16,15 +16,22 @@ from ..utils import load_config_key, ConfigManager
 import os
-def pick_llm(vector_name):
+def pick_llm(vector_name:str=None, config:ConfigManager=None):
+    if config is None:
+        if vector_name is None:
+            raise ValueError("config and vector_name was None")
+        config = ConfigManager(vector_name)
     log.debug('Picking llm')
-    llm_str = load_config_key("llm", vector_name, kind="vacConfig")
+    llm_str = config.vacConfig("llm")
     if llm_str == 'openai':
-        llm_chat = get_llm_chat(vector_name)
-        llm = get_llm_chat(vector_name, model="gpt-3.5-turbo-16k") # TODO: fix it needs llm_chat and not llm
-        embeddings = get_embeddings(vector_name)
+        llm_chat = get_llm_chat(config=config)
+        llm = get_llm_chat(model="gpt-3.5-turbo-16k", config=config) # TODO: fix it needs llm_chat and not llm
+        embeddings = get_embeddings(config=config)
         log.debug("Chose OpenAI")
     elif llm_str == 'vertex':
         llm = get_llm_chat(vector_name) # TODO: fix it needs llm_chat and not llm
@@ -62,7 +69,9 @@ def llm_str_to_llm(llm_str, model=None, vector_name=None, config=None):
     if llm_str is None:
         raise NotImplementedError("llm_str was None")
-    if vector_name:
+    if config is None:
+        if vector_name is None:
+            raise ValueError("vector_name and config was None")
         config = ConfigManager(vector_name)
     if llm_str == 'openai':
@@ -125,10 +134,16 @@ def get_llm(vector_name=None, model=None, config=None):
     log.debug(f"Chose LLM: {llm_str}")
     return llm_str_to_llm(llm_str, model=model, config=config)
-def get_llm_chat(vector_name, model=None):
-    llm_str = load_config_key("llm", vector_name, kind="vacConfig")
+def get_llm_chat(vector_name:str=None, model=None, config:ConfigManager=None):
+    if config is None:
+        if vector_name is None:
+            raise ValueError("config and vector_name was None")
+        config = ConfigManager(vector_name)
+    llm_str = config.vacConfig("llm")
     if not model:
-        model = load_config_key("model", vector_name, kind="vacConfig")
+        model = config.vacConfig("model")
     log.debug(f"Chose LLM: {llm_str}")
     # Configure LLMs based on llm_str
@@ -167,7 +182,7 @@ def get_llm_chat(vector_name, model=None):
         return ChatAnthropic(model_name = model, temperature=0)
     elif llm_str == 'azure':
         from langchain_openai import AzureChatOpenAI
-        azure_config = load_config_key("azure", vector_name, kind="vacConfig")
+        azure_config = config.vacConfig("azure")
         if not azure_config:
             raise ValueError("Need to configure azure.config if llm='azure'")
@@ -209,22 +224,37 @@ def get_llm_chat(vector_name, model=None):
     if llm_str is None:
         raise NotImplementedError(f'No llm implemented for {llm_str}')
-def get_embeddings(vector_name):
+def get_embeddings(vector_name=None, config:ConfigManager=None):
+    if not config:
+        if not vector_name:
+            raise ValueError(f"config and vector_name was None: {vector_name}")
+        config = ConfigManager(vector_name)
     llm_str = None
-    embed_dict = load_config_key("embedder", vector_name, kind="vacConfig")
+    embed_dict = config.vacConfig("embedder")
     if embed_dict:
         llm_str = embed_dict.get('llm')
     if llm_str is None:
-        llm_str = load_config_key("llm", vector_name, kind="vacConfig")
+        llm_str = config.vacConfig("llm")
+    if llm_str is None:
+        raise ValueError(f"llm_str was None: {llm_str}")
-    return pick_embedding(llm_str, vector_name=vector_name)
+    return pick_embedding(llm_str, config=config)
 #TODO: specify model
-def pick_embedding(llm_str: str, vector_name: str=None):
+def pick_embedding(llm_str: str, vector_name: str=None, config: ConfigManager=None):
+    if not config:
+        if not vector_name:
+            raise ValueError(f"config and vector_name was None {vector_name}")
+        config = ConfigManager(vector_name)
     # get embedding directly from llm_str
     # Configure embeddings based on llm_str
     if llm_str == 'openai':
@@ -244,7 +274,7 @@ def pick_embedding(llm_str: str, vector_name: str=None):
     elif llm_str == 'azure':
         from langchain_openai import AzureOpenAIEmbeddings
-        azure_config = load_config_key("azure", vector_name, kind="vacConfig")
+        azure_config = config.vacConfig("azure")
         if not azure_config:
             raise ValueError("Need to configure azure.config if llm='azure'")

sunholo/components/retriever.py CHANGED Viewed

@@ -26,8 +26,14 @@ from langchain.retrievers import ContextualCompressionRetriever
-def load_memories(vector_name):
-    memories = ConfigManager(vector_name).vacConfig("memory")
+def load_memories(vector_name:str=None, config:ConfigManager=None):
+    if config is None:
+        if vector_name is None:
+            raise ValueError("vector_name and config were none")
+        config = ConfigManager(vector_name)
+    memories = config.vacConfig("memory")
     log.info(f"Found memory settings for {vector_name}: {memories}")
     if not memories or len(memories) == 0:
         log.info(f"No memory settings found for {vector_name}")

sunholo/database/alloydb.py CHANGED Viewed

@@ -232,7 +232,7 @@ async def load_alloydb_sql_async(sql, vector_name):
     return documents
 def and_or_ilike(sources, search_type="OR", operator="ILIKE"):
-    unique_sources = set(sources)
+    unique_sources = set(sources.split())
     # Choose the delimiter based on the search_type argument
     delimiter = ' AND ' if search_type.upper() == "AND" else ' OR '
@@ -240,14 +240,14 @@ def and_or_ilike(sources, search_type="OR", operator="ILIKE"):
     conditions = delimiter.join(f"TRIM(source) {operator} '%{source}%'" for source in unique_sources)
     if not conditions:
         log.warning("Alloydb doc query found no like_patterns")
-        return []
+        return ""
     return conditions
 def _get_sources_from_docstore(sources, vector_name, search_type="OR"):
     if not sources:
         log.warning("No sources found for alloydb fetch")
-        return []
+        return ""
     table_name = f"{vector_name}_docstore"
@@ -263,10 +263,37 @@ def _get_sources_from_docstore(sources, vector_name, search_type="OR"):
     return query
+def _list_sources_from_docstore(sources, vector_name, search_type="OR"):
+    table_name = f"{vector_name}_docstore"
+    if sources:
+        conditions = and_or_ilike(sources, search_type=search_type)
+        query = f"""
+            SELECT DISTINCT langchain_metadata->>'objectId' AS objectId
+            FROM {table_name}
+            WHERE {conditions}
+            ORDER BY langchain_metadata->>'objectId' ASC
+            LIMIT 500;
+        """
+    else:
+        query = f"""
+            SELECT DISTINCT langchain_metadata->>'objectId' AS objectId
+            FROM {table_name}
+            ORDER BY langchain_metadata->>'objectId' ASC
+            LIMIT 500;
+        """
+    return query
-async def get_sources_from_docstore_async(sources, vector_name, search_type="OR"):
+async def get_sources_from_docstore_async(sources, vector_name, search_type="OR", just_source_name=False):
-    query = _get_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
+    if just_source_name:
+        query = _list_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
+    else:
+        query = _get_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
     if not query:
         return []
@@ -274,9 +301,13 @@ async def get_sources_from_docstore_async(sources, vector_name, search_type="OR"
     return documents
-def get_sources_from_docstore(sources, vector_name, search_type="OR"):
+def get_sources_from_docstore(sources, vector_name, search_type="OR", just_source_name=False):
-    query = _get_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
+    if just_source_name:
+        query = _list_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
+    else:
+        query = _get_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
     if not query:
         return []
@@ -303,3 +334,5 @@ def delete_sources_from_alloydb(sources, vector_name):
         DELETE FROM {vector_name}_vectorstore_{vector_length}
         WHERE {conditions}
     """
+    return query

sunholo/embedder/embed_chunk.py CHANGED Viewed

@@ -22,6 +22,7 @@ from langchain.schema import Document
 from ..components import get_embeddings, pick_vectorstore, load_memories, pick_embedding
 from ..logging import log
 from ..database.uuid import generate_uuid_from_object_id
+from ..utils import ConfigManager
 def embed_pubsub_chunk(data: dict):
     """Triggered from a message on a Cloud Pub/Sub topic "embed_chunk" topic
@@ -63,6 +64,9 @@ def embed_pubsub_chunk(data: dict):
         log.error(msg)
         return msg
+    config = ConfigManager(vector_name)
+    log.info(f"{config=}")
     log.info(f"Embedding: {vector_name} page_content: {page_content[:30]}...[{len(page_content)}] - {metadata}")
     if 'eventTime' not in metadata:
@@ -102,9 +106,9 @@ def embed_pubsub_chunk(data: dict):
     doc = Document(page_content=page_content, metadata=metadata)
     # init embedding and vector store
-    embeddings = get_embeddings(vector_name)
+    embeddings = get_embeddings(config=config)
-    memories = load_memories(vector_name)
+    memories = load_memories(config=config)
     vectorstore_list = []
     for memory in memories:  # Iterate over the list
         for key, value in memory.items():
@@ -114,7 +118,7 @@ def embed_pubsub_chunk(data: dict):
                 # check if vectorstore specific embedding is available
                 embed_llm = value.get('llm')
                 if embed_llm:
-                    embeddings = pick_embedding(embed_llm)
+                    embeddings = pick_embedding(embed_llm, config=config)
                 # check if read only
                 read_only = value.get('read_only')
                 if read_only:

sunholo/utils/config_class.py CHANGED Viewed

@@ -37,6 +37,10 @@ class ConfigManager:
         self.local_config_folder = local_config_folder
         self.configs_by_kind = self.load_all_configs()
+        test_agent = self.vacConfig("agent")
+        if not test_agent:
+            print(f"WARNING: No vacConfig.agent found for {self.vector_name} - are you in right folder? {local_config_folder=} {self.config_folder=}")
     def load_all_configs(self):
         """
         Load all configuration files from the specified directories into a dictionary.
@@ -121,7 +125,7 @@ class ConfigManager:
         self.config_cache[filename] = (config, datetime.now())
         log.debug(f"Loaded and cached {config_file}")
         if is_local:
-            log.warning(f"Local configuration override for {filename}")
+            log.warning(f"Local configuration override for {filename} via {self.local_config_folder}")
         return config
     def _check_and_reload_configs(self):

{sunholo-0.78.4.dist-info → sunholo-0.79.0.dist-info}/METADATA RENAMED Viewed

@@ -1,9 +1,9 @@
 Metadata-Version: 2.1
 Name: sunholo
-Version: 0.78.4
+Version: 0.79.0
 Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
 Home-page: https://github.com/sunholo-data/sunholo-py
-Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.78.4.tar.gz
+Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.79.0.tar.gz
 Author: Holosun ApS
 Author-email: multivac@sunholo.com
 License: Apache License, Version 2.0

{sunholo-0.78.4.dist-info → sunholo-0.79.0.dist-info}/RECORD RENAMED Viewed

@@ -31,7 +31,7 @@ sunholo/bots/github_webhook.py,sha256=5pQPRLM_wxxcILVaIzUDV8Kt7Arcm2dL1r1kMMHA52
 sunholo/bots/webapp.py,sha256=EIMxdAJ_xtufwJmvnn7N_Fb_1hZ9DjhJ0Kf_hp02vEU,1926
 sunholo/chunker/__init__.py,sha256=A5canS0XPgisHu0OZ7sVdILgEHGzgH9kpkDi4oBwLZk,135
 sunholo/chunker/azure.py,sha256=iZ0mXjei0cILsLuSUnZK0mmUUsQNiC3ZQr1iX8q5IeY,3263
-sunholo/chunker/doc_handling.py,sha256=rIyknpzDyj5A0u_DqSQVD_CXLRNZPOU6TCL4bhCdjOI,8563
+sunholo/chunker/doc_handling.py,sha256=AV-HU4FePKsk1mPASc3XOhJrqwdxnvEKc0GSpPTswMA,8714
 sunholo/chunker/encode_metadata.py,sha256=SYHaqKcr4lCzwmrzUGhgX4_l4pzDv7wAeNCw7a461MA,1912
 sunholo/chunker/images.py,sha256=Xmh1vwHrVhoXm5iH2dhCc52O8YgdzE8KrDSdL-pGnp8,1861
 sunholo/chunker/loaders.py,sha256=xiToUVgPz2ZzcqpUAq7aNP3PTenb_rBUAFzu0JPycIg,10268
@@ -54,11 +54,11 @@ sunholo/cli/sun_rich.py,sha256=UpMqeJ0C8i0pkue1AHnnyyX0bFJ9zZeJ7HBR6yhuA8A,54
 sunholo/cli/swagger.py,sha256=absYKAU-7Yd2eiVNUY-g_WLl2zJfeRUNdWQ0oH8M_HM,1564
 sunholo/cli/vertex.py,sha256=8130YCarxHL1UC3aqblNmUwGZTXbkdL4Y_FOnZJsWiI,2056
 sunholo/components/__init__.py,sha256=IDoylb74zFKo6NIS3RQqUl0PDFBGVxM1dfUmO7OJ44U,176
-sunholo/components/llm.py,sha256=QTTpqUhfj7u9Ty9-E-XL8dpg4fp19z64FdRC1zbTHVo,10698
-sunholo/components/retriever.py,sha256=BFUw_6turT3CQJZWv_uXylmH5fHdb0gKfKJrQ_j6MGY,6533
+sunholo/components/llm.py,sha256=XhSFuvthK35LDirX-zUbeLrLU8ccLSGxdJOOQovBGEM,11481
+sunholo/components/retriever.py,sha256=F-wgZMpGJ8mGxJMAHA7HNgDwEhnvq1Pd6EGnTuBFlY8,6719
 sunholo/components/vectorstore.py,sha256=zUJ90L1S4IyxLB0JUWopeuwVjcsSqdhj1QreEfsJhsE,5548
 sunholo/database/__init__.py,sha256=Zz0Shcq-CtStf9rJGIYB_Ybzb8rY_Q9mfSj-nviM490,241
-sunholo/database/alloydb.py,sha256=d9W0pbZB0jTVIGF5OVaQ6kXHo-X3-6e9NpWNmV5e9UY,10464
+sunholo/database/alloydb.py,sha256=ZZGDA6DBSoWouDFi69LvTT1DgiiBz3aSR6u1hFO-IZY,11520
 sunholo/database/alloydb_client.py,sha256=AYA0SSaBy-1XEfeZI97sMGehfrwnfbwZ8sE0exzI2E0,7254
 sunholo/database/database.py,sha256=UDHkceiEvJmS3esQX2LYEjEMrHcogN_JHuJXoVWCH3M,7354
 sunholo/database/lancedb.py,sha256=2rAbJVusMrm5TPtVTsUtmwn0z1iZ_wvbKhc6eyT6ClE,708
@@ -75,7 +75,7 @@ sunholo/discovery_engine/chunker_handler.py,sha256=fDqvXeXr58s6TB75MMIGKKEg42T21
 sunholo/discovery_engine/create_new.py,sha256=7oZG78T6lW0EspRzlo7-qRyXFSuFxDn2dfSAVEaqlqY,978
 sunholo/discovery_engine/discovery_engine_client.py,sha256=YYsFeaW41l8jmWCruQnYxJGKEYBZ7dduTBDhdxI63hQ,17719
 sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
-sunholo/embedder/embed_chunk.py,sha256=8BJ90tR0_JbCcsVCzrtPdZn6sVys0OhXSxLszlve_ko,6819
+sunholo/embedder/embed_chunk.py,sha256=FFr5pDvFCsWNS5JnTjuf1aCpg4Qlut83wqndneavnj8,6944
 sunholo/gcs/__init__.py,sha256=SZvbsMFDko40sIRHTHppA37IijvJTae54vrhooEF5-4,90
 sunholo/gcs/add_file.py,sha256=m-iQeYAmdXxy2EJ1uMmM3gx-eKbTcNpfsAyRd4sL_hA,7120
 sunholo/gcs/download_folder.py,sha256=mfntDA3Gl-7quMK9_eSTWvUOY1330jF--1cb62C0K1E,1607
@@ -116,7 +116,7 @@ sunholo/utils/__init__.py,sha256=Hv02T5L2zYWvCso5hzzwm8FQogwBq0OgtUbN_7Quzqc,89
 sunholo/utils/api_key.py,sha256=Ct4bIAQZxzPEw14hP586LpVxBAVi_W9Serpy0BK-7KI,244
 sunholo/utils/big_context.py,sha256=gJIP7_ZL-YSLhOMq8jmFTMqH1wq8eB1NK7oKPeZAq2s,5578
 sunholo/utils/config.py,sha256=XOH2pIvHs6QLnCwVAn7RuyRyV10TfbCEXabSjuEhKdo,8947
-sunholo/utils/config_class.py,sha256=4fm2Bwn_zFhVJBiUnMBzfCA5LKhTcBMU3mzhf5seXrw,8553
+sunholo/utils/config_class.py,sha256=GP58SfYYn32dSTUnyixKGujgF2DZRctc23ZhFRvDTZ8,8808
 sunholo/utils/config_schema.py,sha256=Wv-ncitzljOhgbDaq9qnFqH5LCuxNv59dTGDWgd1qdk,4189
 sunholo/utils/gcp.py,sha256=uueODEpA-P6O15-t0hmcGC9dONLO_hLfzSsSoQnkUss,4854
 sunholo/utils/gcp_project.py,sha256=0ozs6tzI4qEvEeXb8MxLnCdEVoWKxlM6OH05htj7_tc,1325
@@ -132,9 +132,9 @@ sunholo/vertex/init.py,sha256=uyg76EqS39jWJ2gxMqXOLWP6MQ2hc81wFdwgG86ZoCM,2868
 sunholo/vertex/memory_tools.py,sha256=pomHrDKqvY8MZxfUqoEwhdlpCvSGP6KmFJMVKOimXjs,6842
 sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
 sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
-sunholo-0.78.4.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
-sunholo-0.78.4.dist-info/METADATA,sha256=Ic8NbVh3Y5f0OZxVu18aB9QaI5PcIamIG3DDPzH6_6o,7348
-sunholo-0.78.4.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
-sunholo-0.78.4.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
-sunholo-0.78.4.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
-sunholo-0.78.4.dist-info/RECORD,,
+sunholo-0.79.0.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
+sunholo-0.79.0.dist-info/METADATA,sha256=-sMhBNEnku2Fz06P5Ihk8N6KP-VlQ37zCidev8zEMjU,7348
+sunholo-0.79.0.dist-info/WHEEL,sha256=Rp8gFpivVLXx-k3U95ozHnQw8yDcPxmhOpn_Gx8d5nc,91
+sunholo-0.79.0.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
+sunholo-0.79.0.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
+sunholo-0.79.0.dist-info/RECORD,,

{sunholo-0.78.4.dist-info → sunholo-0.79.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (70.3.0)
+Generator: setuptools (72.0.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{sunholo-0.78.4.dist-info → sunholo-0.79.0.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{sunholo-0.78.4.dist-info → sunholo-0.79.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{sunholo-0.78.4.dist-info → sunholo-0.79.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

sunholo 0.78.4__py3-none-any.whl → 0.79.0__py3-none-any.whl

sunholo 0.78.4py3-none-any.whl → 0.79.0py3-none-any.whl