PyPI - sunholo - Versions diffs - 0.59.2__tar.gz → 0.59.4__tar.gz - Mend

sunholo 0.59.2tar.gz → 0.59.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (106) hide show

{sunholo-0.59.2 → sunholo-0.59.4}/PKG-INFO RENAMED Viewed

@@ -1,9 +1,9 @@
 Metadata-Version: 2.1
 Name: sunholo
-Version: 0.59.2
+Version: 0.59.4
 Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
 Home-page: https://github.com/sunholo-data/sunholo-py
-Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.59.2.tar.gz
+Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.59.4.tar.gz
 Author: Holosun ApS
 Author-email: multivac@sunholo.com
 License: Apache License, Version 2.0

{sunholo-0.59.2 → sunholo-0.59.4}/setup.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from setuptools import setup, find_packages
 # Define your base version
-version = '0.59.2'
+version = '0.59.4'
 setup(
     name='sunholo',

{sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/dispatch_to_qa.py RENAMED Viewed

@@ -49,8 +49,8 @@ def prep_request_payload(user_input, chat_history, vector_name, stream, **kwargs
     # Add chat_history/vector_name to kwargs so langserve can use them too
     kwargs['chat_history'] = chat_history
-    agent = load_config_key("agent", vector_name=vector_name, filename="config/llm_config.yaml")
-    agent_type = load_config_key("agent_type", vector_name=vector_name, filename="config/llm_config.yaml")
+    agent = load_config_key("agent", vector_name=vector_name, kind="vacConfig")
+    agent_type = load_config_key("agent_type", vector_name=vector_name, kind="vacConfig")
     # {'stream': '', 'invoke': ''}
     endpoints = route_endpoint(vector_name)

{sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/route.py RENAMED Viewed

@@ -16,12 +16,12 @@ from ..utils import load_config_key, load_config
 def route_qna(vector_name):
-    agent_url = load_config_key('agent_url', vector_name=vector_name, filename='config/llm_config.yaml')
+    agent_url = load_config_key('agent_url', vector_name=vector_name, kind="vacConfig")
     if agent_url:
         log.info('agent_url found in llm_config.yaml')
         return agent_url
-    agent = load_config_key('agent', vector_name, filename='config/llm_config.yaml')
+    agent = load_config_key('agent', vector_name, kind="vacConfig")
     log.info(f'agent_type: {agent}')
     agent_route, _ = load_config('config/cloud_run_urls.json')
@@ -37,9 +37,9 @@ def route_qna(vector_name):
 def route_endpoint(vector_name):
-    agent_type = load_config_key('agent_type', vector_name, filename='config/llm_config.yaml')
+    agent_type = load_config_key('agent_type', vector_name, kind="vacConfig")
     if not agent_type:
-        agent_type = load_config_key('agent', vector_name, filename='config/llm_config.yaml')
+        agent_type = load_config_key('agent', vector_name, kind="vacConfig")
     stem = route_qna(vector_name)

{sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/special_commands.py RENAMED Viewed

@@ -41,14 +41,13 @@ def handle_special_commands(user_input,
                             vector_name,
                             chat_history,
                             bucket=None,
-                            cmds=None,
-                            config_file="config/llm_config.yaml"):
+                            cmds=None):
     now = datetime.datetime.now()
     hourmin = now.strftime("%H%M%S")
     the_datetime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
     if not cmds:
-        cmds = load_config_key("user_special_cmds", vector_name=vector_name, filename=config_file)
+        cmds = load_config_key("user_special_cmds", vector_name=vector_name, kind="vacConfig")
         if not cmds:
             return None

{sunholo-0.59.2 → sunholo-0.59.4}/sunholo/auth/run.py RENAMED Viewed

@@ -15,7 +15,7 @@ def get_run_url(vector_name=None):
     cloud_urls = route_qna(vector_name)
     cloud_urls, _ = load_config('config/cloud_run_urls.json')
-    agent = load_config_key("agent", vector_name=vector_name, filename="config/llm_config.yaml")
+    agent = load_config_key("agent", vector_name=vector_name, kind="vacConfig")
     try:
         log.info(f'Looking up URL for {agent}')

sunholo-0.59.4/sunholo/bots/github_webhook.py ADDED Viewed

@@ -0,0 +1,264 @@
+# from https://github.com/ray-project/docu-mentor
+import base64
+import httpx
+from dotenv import load_dotenv
+import jwt
+import os
+import time
+load_dotenv()
+APP_ID = os.environ.get("APP_ID")
+PRIVATE_KEY = os.environ.get("PRIVATE_KEY", "")
+# with open('private-key.pem', 'r') as f:
+#     PRIVATE_KEY = f.read()
+def generate_jwt():
+    payload = {
+        "iat": int(time.time()),
+        "exp": int(time.time()) + (10 * 60),
+        "iss": APP_ID,
+    }
+    if PRIVATE_KEY:
+        jwt_token = jwt.encode(payload, PRIVATE_KEY, algorithm="RS256")
+        return jwt_token
+    raise ValueError("PRIVATE_KEY not found.")
+async def get_installation_access_token(jwt, installation_id):
+    url = f"https://api.github.com/app/installations/{installation_id}/access_tokens"
+    headers = {
+        "Authorization": f"Bearer {jwt}",
+        "Accept": "application/vnd.github.v3+json",
+    }
+    async with httpx.AsyncClient() as client:
+        response = await client.post(url, headers=headers)
+        return response.json()["token"]
+def get_diff_url(pr):
+    """GitHub 302s to this URL."""
+    original_url = pr.get("url")
+    parts = original_url.split("/")
+    owner, repo, pr_number = parts[-4], parts[-3], parts[-1]
+    return f"https://patch-diff.githubusercontent.com/raw/{owner}/{repo}/pull/{pr_number}.diff"
+async def get_branch_files(pr, branch, headers):
+    original_url = pr.get("url")
+    parts = original_url.split("/")
+    owner, repo = parts[-4], parts[-3]
+    url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{branch}?recursive=1"
+    async with httpx.AsyncClient() as client:
+        response = await client.get(url, headers=headers)
+        tree = response.json().get('tree', [])
+        files = {}
+        for item in tree:
+            if item['type'] == 'blob':
+                file_url = item['url']
+                print(file_url)
+                file_response = await client.get(file_url, headers=headers)
+                content = file_response.json().get('content', '')
+                # Decode the base64 content
+                decoded_content = base64.b64decode(content).decode('utf-8')
+                files[item['path']] = decoded_content
+        return files
+async def get_pr_head_branch(pr, headers):
+    original_url = pr.get("url")
+    parts = original_url.split("/")
+    owner, repo, pr_number = parts[-4], parts[-3], parts[-1]
+    url = f"https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}"
+    async with httpx.AsyncClient() as client:
+        response = await client.get(url, headers=headers)
+        # Check if the response is successful
+        if response.status_code != 200:
+            print(f"Error: Received status code {response.status_code}")
+            print("Response body:", response.text)
+            return ''
+        # Safely get the 'ref'
+        data = response.json()
+        head_data = data.get('head', {})
+        ref = head_data.get('ref', '')
+        return ref
+def files_to_diff_dict(diff):
+    files_with_diff = {}
+    current_file = None
+    for line in diff.split("\n"):
+        if line.startswith("diff --git"):
+            current_file = line.split(" ")[2][2:]
+            files_with_diff[current_file] = {"text": []}
+        elif line.startswith("+") and not line.startswith("+++"):
+            files_with_diff[current_file]["text"].append(line[1:])
+    return files_with_diff
+def parse_diff_to_line_numbers(diff):
+    files_with_line_numbers = {}
+    current_file = None
+    line_number = 0
+    for line in diff.split("\n"):
+        if line.startswith("diff --git"):
+            current_file = line.split(" ")[2][2:]
+            files_with_line_numbers[current_file] = []
+            line_number = 0
+        elif line.startswith("@@"):
+            line_number = int(line.split(" ")[2].split(",")[0][1:]) - 1
+        elif line.startswith("+") and not line.startswith("+++"):
+            files_with_line_numbers[current_file].append(line_number)
+            line_number += 1
+        elif not line.startswith("-"):
+            line_number += 1
+    return files_with_line_numbers
+def get_context_from_files(files, files_with_line_numbers, context_lines=2):
+    context_data = {}
+    for file, lines in files_with_line_numbers.items():
+        file_content = files[file].split("\n")
+        context_data[file] = []
+        for line in lines:
+            start = max(line - context_lines, 0)
+            end = min(line + context_lines + 1, len(file_content))
+            context_data[file].append('\n'.join(file_content[start:end]))
+    return context_data
+app = FastAPI()
+async def handle_webhook(request: Request):
+    data = await request.json()
+    installation = data.get("installation")
+    if installation and installation.get("id"):
+        installation_id = installation.get("id")
+        logger.info(f"Installation ID: {installation_id}")
+        JWT_TOKEN = generate_jwt()
+        installation_access_token = await get_installation_access_token(
+            JWT_TOKEN, installation_id
+        )
+        headers = {
+            "Authorization": f"token {installation_access_token}",
+            "User-Agent": "docu-mentor-bot",
+            "Accept": "application/vnd.github.VERSION.diff",
+        }
+    else:
+        raise ValueError("No app installation found.")
+    # If PR exists and is opened
+    if "pull_request" in data.keys() and (
+        data["action"] in ["opened", "reopened"]
+    ):  # use "synchronize" for tracking new commits
+        pr = data.get("pull_request")
+        # Greet the user and show instructions.
+        async with httpx.AsyncClient() as client:
+            await client.post(
+                f"{pr['issue_url']}/comments",
+                json={"body": GREETING},
+                headers=headers,
+            )
+        return JSONResponse(content={}, status_code=200)
+    # Check if the event is a new or modified issue comment
+    if "issue" in data.keys() and data.get("action") in ["created", "edited"]:
+        issue = data["issue"]
+        # Check if the issue is a pull request
+        if "/pull/" in issue["html_url"]:
+            pr = issue.get("pull_request")
+            # Get the comment body
+            comment = data.get("comment")
+            comment_body = comment.get("body")
+            # Remove all whitespace characters except for regular spaces
+            comment_body = comment_body.translate(
+                str.maketrans("", "", string.whitespace.replace(" ", ""))
+            )
+            # Skip if the bot talks about itself
+            author_handle = comment["user"]["login"]
+            # Check if the bot is mentioned in the comment
+            if (
+                author_handle != "docu-mentor[bot]"
+                and "@docu-mentor run" in comment_body
+            ):
+                async with httpx.AsyncClient() as client:
+                    # Fetch diff from GitHub
+                    files_to_keep = comment_body.replace(
+                        "@docu-mentor run", ""
+                    ).split(" ")
+                    files_to_keep = [item for item in files_to_keep if item]
+                    logger.info(files_to_keep)
+                    url = get_diff_url(pr)
+                    diff_response = await client.get(url, headers=headers)
+                    diff = diff_response.text
+                    files_with_lines = parse_diff_to_line_numbers(diff)
+                    # Get head branch of the PR
+                    headers["Accept"] = "application/vnd.github.full+json"
+                    head_branch = await get_pr_head_branch(pr, headers)
+                    # Get files from head branch
+                    head_branch_files = await get_branch_files(pr, head_branch, headers)
+                    print("HEAD FILES", head_branch_files)
+                    # Enrich diff data with context from the head branch.
+                    context_files = get_context_from_files(head_branch_files, files_with_lines)
+                    # Filter the dictionary
+                    if files_to_keep:
+                        context_files = {
+                            k: context_files[k]
+                            for k in context_files
+                            if any(sub in k for sub in files_to_keep)
+                        }
+                    # Get suggestions from Docu Mentor
+                    content, model, prompt_tokens, completion_tokens = \
+                        ray_mentor(context_files) if ray.is_initialized() else mentor(context_files)
+                    # Let's comment on the PR
+                    await client.post(
+                        f"{comment['issue_url']}/comments",
+                        json={
+                            "body": f":rocket: Docu Mentor finished "
+                            + "analysing your PR! :rocket:\n\n"
+                            + "Take a look at your results:\n"
+                            + f"{content}\n\n"
+                            + "This bot is  powered by "
+                            + "[Sunholo Multivac](https://www.sunholo.com/).\n"
+                            + f"It used the model {model}, used {prompt_tokens} prompt tokens, "
+                            + f"and {completion_tokens} completion tokens in total."
+                        },
+                        headers=headers,
+                    )
+@serve.deployment(route_prefix="/")
+@serve.ingress(app)
+class ServeBot:
+    @app.get("/")
+    async def root(self):
+        return {"message": "Docu Mentor reporting for duty!"}
+    @app.post("/webhook/")
+    async def handle_webhook_route(self, request: Request):
+        return await handle_webhook(request)

{sunholo-0.59.2 → sunholo-0.59.4}/sunholo/chunker/doc_handling.py RENAMED Viewed

@@ -21,7 +21,7 @@ def send_doc_to_docstore(docs, vector_name):
     # docs all come from the same file but got split into a list of document objects
-    docstore_config = load_config_key("docstore", vector_name=vector_name, filename="config/llm_config.yaml")
+    docstore_config = load_config_key("docstore", vector_name=vector_name, kind="vacConfig")
     if docstore_config is None:
         log.info(f"No docstore config found for {vector_name} ")
@@ -110,7 +110,7 @@ def summarise_docs(docs, vector_name, summary_threshold_default=10000, model_lim
     if not docs:
         return None
-    chunker_config = load_config_key("chunker", vector_name=vector_name, filename="config/llm_config.yaml")
+    chunker_config = load_config_key("chunker", vector_name=vector_name, kind="vacConfig")
     summarise_chunking_config = chunker_config.get("summarise") if chunker_config else None
     if not summarise_chunking_config:

{sunholo-0.59.2 → sunholo-0.59.4}/sunholo/chunker/splitter.py RENAMED Viewed

@@ -114,7 +114,7 @@ def choose_splitter(extension: str, chunk_size: int=1024, chunk_overlap:int=200,
     if vector_name:
         # check if there is a chunking configuration
         from ..utils import load_config_key
-        chunk_config = load_config_key("chunker", vector_name=vector_name, filename="config/llm_config.yaml")
+        chunk_config = load_config_key("chunker", vector_name=vector_name, kind="vacConfig")
         if chunk_config:
             if chunk_config.get("type") == "semantic":
                 embedding_str = chunk_config.get("llm")

{sunholo-0.59.2 → sunholo-0.59.4}/sunholo/components/retriever.py RENAMED Viewed

@@ -27,7 +27,7 @@ from langchain.retrievers import ContextualCompressionRetriever
 def load_memories(vector_name):
-    memories = load_config_key("memory", vector_name, type="vacConfig")
+    memories = load_config_key("memory", vector_name, kind="vacConfig")
     log.info(f"Found memory settings for {vector_name}: {memories}")
     if len(memories) == 0:
         log.info(f"No memory settings found for {vector_name}")
@@ -70,7 +70,7 @@ def pick_retriever(vector_name, embeddings=None):
         log.info(f"No retrievers were created for {memories}")
         return None
-    k_override = load_config_key("memory_k", vector_name, type="vacConfig")
+    k_override = load_config_key("memory_k", vector_name, kind="vacConfig")
     if not k_override:
         k_override = 3

{sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/alloydb.py RENAMED Viewed

@@ -19,7 +19,7 @@ def create_alloydb_engine(vector_name):
     alloydb_config = load_config_key(
         'alloydb_config',
         vector_name=vector_name,
-        filename = "config/llm_config.yaml"
+        kind="vacConfig"
     )
     if alloydb_config is None:

{sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/database.py RENAMED Viewed

@@ -48,16 +48,16 @@ def lookup_connection_env(vs_str):
     raise ValueError("Could not find vectorstore for {vs_str}")
-def get_vector_size(vector_name: str, config_file:str="config/llm_config.yaml"):
+def get_vector_size(vector_name: str):
     llm_str = None
-    embed_dict = load_config_key("embedder", vector_name, filename=config_file)
+    embed_dict = load_config_key("embedder", vector_name, kind="vacConfig")
     if embed_dict:
         llm_str = embed_dict.get('llm')
     if llm_str is None:
-        llm_str = load_config_key("llm", vector_name, filename=config_file)
+        llm_str = load_config_key("llm", vector_name, kind="vacConfig")
     if not isinstance(llm_str, str):
         raise ValueError(f"get_vector_size() did not return a value string for {vector_name} - got {llm_str} instead")

{sunholo-0.59.2 → sunholo-0.59.4}/sunholo/llamaindex/import_files.py RENAMED Viewed

@@ -102,7 +102,7 @@ def do_llamaindex(message_data, metadata, vector_name):
     if not rag:
         raise ValueError("Need to install vertexai module via `pip install sunholo[gcp]`")
-    gcp_config = load_config_key("gcp_config", vector_name=vector_name, type="vacConfig")
+    gcp_config = load_config_key("gcp_config", vector_name=vector_name, kind="vacConfig")
     if not gcp_config:
         raise ValueError(f"Need config.{vector_name}.gcp_config to configure llamaindex on VertexAI")
@@ -120,7 +120,7 @@ def do_llamaindex(message_data, metadata, vector_name):
     log.info(f"Found llamaindex corpus: {corpus}")
     # native support for cloud storage and drive links
-    chunker_config = load_config_key("chunker", vector_name=vector_name, type="vacConfig")
+    chunker_config = load_config_key("chunker", vector_name=vector_name, kind="vacConfig")
     if message_data.startswith("gs://") or message_data.startswith("https://drive.google.com"):
         log.info(f"rag.import_files for {message_data}")
@@ -160,7 +160,7 @@ def do_llamaindex(message_data, metadata, vector_name):
         #)
 def check_llamaindex_in_memory(vector_name):
-    memories = load_config_key("memory", vector_name=vector_name, type="vacConfig")
+    memories = load_config_key("memory", vector_name=vector_name, kind="vacConfig")
     for memory in memories:  # Iterate over the list
         for key, value in memory.items():  # Now iterate over the dictionary
             log.info(f"Found memory {key}")
@@ -175,7 +175,7 @@ def check_llamaindex_in_memory(vector_name):
 def llamaindex_chunker_check(message_data, metadata, vector_name):
     # llamaindex handles its own chunking/embedding
-    memories = load_config_key("memory", vector_name=vector_name, type="vacConfig")
+    memories = load_config_key("memory", vector_name=vector_name, kind="vacConfig")
     total_memories = len(memories)
     llama = None
     if check_llamaindex_in_memory(vector_name):

{sunholo-0.59.2 → sunholo-0.59.4}/sunholo/streaming/streaming.py RENAMED Viewed

@@ -248,8 +248,8 @@ def generate_proxy_stream(stream_to_f, user_input, vector_name, chat_history, ge
         ):
             print(output)  # Process each streaming output chunk
     """
-    agent = load_config_key("agent", vector_name=vector_name, filename="config/llm_config.yaml")
-    agent_type = load_config_key("agent_type", vector_name=vector_name, filename="config/llm_config.yaml")
+    agent = load_config_key("agent", vector_name=vector_name, kind="vacConfig")
+    agent_type = load_config_key("agent_type", vector_name=vector_name, kind="vacConfig")
     def generate():
         json_buffer = ""
@@ -306,8 +306,8 @@ async def generate_proxy_stream_async(stream_to_f, user_input, vector_name, chat
         ):
             print(output)  # Process each streaming output chunk
     """
-    agent = load_config_key("agent", vector_name=vector_name, type = "vacConfig")
-    agent_type = load_config_key("agent_type", vector_name=vector_name, type = "vacConfig")
+    agent = load_config_key("agent", vector_name=vector_name, kind="vacConfig")
+    agent_type = load_config_key("agent_type", vector_name=vector_name, kind="vacConfig")
     async def generate():
         json_buffer = ""

{sunholo-0.59.2 → sunholo-0.59.4}/sunholo/utils/config.py RENAMED Viewed

@@ -167,14 +167,13 @@ def load_config(filename: str=None) -> tuple[dict, str]:
     return config, filename
-def load_config_key(key: str, vector_name: str, filename: str=None, kind: str=None):
+def load_config_key(key: str, vector_name: str, kind: str=None):
     """
     Load a specific key from a configuration file.
     Args:
         key (str): The key to fetch from the configuration.
         vector_name (str): The name of the vector in the configuration file.
-        filename (str, optional): The configuration file name. Defaults to the `_CONFIG_FILE` environment variable. Deprecated - use 'kind' instead
         kind: (str, optional): Specify the type of configuration to retrieve e.g. 'vacConfig' which will pick from files within `_CONFIG_FOLDER`
     Returns:
@@ -182,7 +181,7 @@ def load_config_key(key: str, vector_name: str, filename: str=None, kind: str=No
     Example:
     ```python
-    api_url = load_config_key('apiUrl', 'myVector', 'config.yaml')
+    api_url = load_config_key('apiUrl', 'myVector', kind="vacConfig")
     print(f'API URL: {api_url}')
     ```
     """
@@ -197,9 +196,6 @@ def load_config_key(key: str, vector_name: str, filename: str=None, kind: str=No
     if kind:
         log.info(f"Got kind: {kind} - applying to configs")
-    if filename:
-        log.warning(f"Got filename argument: {filename} for config - deprecated - use `kind='vacConfig'` instead")
     if not configs_by_kind:
         log.warning("Did not load configs via folder")

{sunholo-0.59.2 → sunholo-0.59.4}/sunholo/utils/config_schema.py RENAMED Viewed

@@ -52,7 +52,8 @@ VAC_SUBCONFIG_SCHEMA = {
                 "cluster": {"type": "string"},
                 "instance": {"type": "string"},
                 "database": {"type": "string"}
-            }
+            },
+            "required": ["project_id", "region", "cluster", "instance", "database"]
         },
         "secrets": {
             "type": "array",
@@ -87,7 +88,7 @@ VAC_CONFIG_SCHEMA = {
             }
         }
     },
-    "required": ["kind", "apiVersion", "gcp_config", "vac"]
+    "required": ["kind", "apiVersion", "vac"]
 }
 PROMPT_CONFIG_SCHEMA = {

{sunholo-0.59.2 → sunholo-0.59.4}/sunholo.egg-info/PKG-INFO RENAMED Viewed

@@ -1,9 +1,9 @@
 Metadata-Version: 2.1
 Name: sunholo
-Version: 0.59.2
+Version: 0.59.4
 Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
 Home-page: https://github.com/sunholo-data/sunholo-py
-Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.59.2.tar.gz
+Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.59.4.tar.gz
 Author: Holosun ApS
 Author-email: multivac@sunholo.com
 License: Apache License, Version 2.0

{sunholo-0.59.2 → sunholo-0.59.4}/sunholo.egg-info/SOURCES.txt RENAMED Viewed

@@ -31,6 +31,7 @@ sunholo/auth/__init__.py
 sunholo/auth/run.py
 sunholo/bots/__init__.py
 sunholo/bots/discord.py
+sunholo/bots/github_webhook.py
 sunholo/bots/webapp.py
 sunholo/chunker/__init__.py
 sunholo/chunker/data_to_embed_pubsub.py