PyPI - sunholo - Versions diffs - 0.72.0__py3-none-any.whl → 0.73.3__py3-none-any.whl - Mend

sunholo 0.72.0py3-none-any.whl → 0.73.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

sunholo/agents/dispatch_to_qa.py +10 -7
sunholo/agents/langserve.py +1 -1
sunholo/agents/route.py +24 -9
sunholo/cli/chat_vac.py +119 -67
sunholo/cli/cli.py +3 -3
sunholo/cli/embedder.py +2 -1
sunholo/components/retriever.py +2 -2
sunholo/gcs/add_file.py +19 -11
sunholo/invoke/__init__.py +1 -0
sunholo/invoke/invoke_vac_utils.py +151 -0
sunholo/langfuse/prompts.py +9 -3
sunholo/llamaindex/import_files.py +8 -7
sunholo/streaming/langserve.py +4 -1
sunholo/utils/config.py +1 -1
sunholo/utils/config_class.py +21 -9
sunholo/vertex/extensions_class.py +179 -64
sunholo/vertex/memory_tools.py +1 -1
{sunholo-0.72.0.dist-info → sunholo-0.73.3.dist-info}/METADATA +4 -3
{sunholo-0.72.0.dist-info → sunholo-0.73.3.dist-info}/RECORD +23 -22
sunholo/vertex/extensions.py +0 -326
{sunholo-0.72.0.dist-info → sunholo-0.73.3.dist-info}/LICENSE.txt +0 -0
{sunholo-0.72.0.dist-info → sunholo-0.73.3.dist-info}/WHEEL +0 -0
{sunholo-0.72.0.dist-info → sunholo-0.73.3.dist-info}/entry_points.txt +0 -0
{sunholo-0.72.0.dist-info → sunholo-0.73.3.dist-info}/top_level.txt +0 -0

sunholo/agents/dispatch_to_qa.py CHANGED Viewed

@@ -12,7 +12,7 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 from ..logging import log
-from ..utils import load_config_key
+from ..utils import ConfigManager
 from ..auth import get_header
 import requests
 import aiohttp
@@ -46,26 +46,29 @@ def prep_request_payload(user_input, chat_history, vector_name, stream, **kwargs
     ```
     """
+    config = ConfigManager(vector_name)
     # Add chat_history/vector_name to kwargs so langserve can use them too
     kwargs['chat_history'] = chat_history
-    agent = load_config_key("agent", vector_name=vector_name, kind="vacConfig")
-    agent_type = load_config_key("agent_type", vector_name=vector_name, kind="vacConfig")
+    agent = config.vacConfig("agent")
+    agent_type = config.vacConfig("agent_type")
     override_endpoint = kwargs.get("override_endpoint")
     if override_endpoint:
         log.info(f"Overriding endpoint with {override_endpoint}")
     # {'stream': '', 'invoke': ''}
-    endpoints = route_endpoint(vector_name, override_endpoint=override_endpoint)
+    post_endpoints = route_endpoint(override_endpoint=override_endpoint, config=config)
     if stream:
-        qna_endpoint = endpoints["stream"]
+        qna_endpoint = post_endpoints["stream"]
     else:
-        qna_endpoint = endpoints["invoke"]
+        qna_endpoint = post_endpoints["invoke"]
     if agent == "langserve" or agent_type == "langserve":
-        qna_data = prepare_request_data(user_input, endpoints["input_schema"], vector_name, **kwargs)
+        get_endpoints = route_endpoint(override_endpoint=override_endpoint, method = 'get', config=config)
+        qna_data = prepare_request_data(user_input, get_endpoints["input_schema"], vector_name, **kwargs)
     else:
         # Base qna_data dictionary
         qna_data = {

sunholo/agents/langserve.py CHANGED Viewed

@@ -97,6 +97,6 @@ def prepare_request_data(user_input, endpoint, vector_name, **kwargs):
         return request_data
     else:
-        log.error("Invalid or no input schema available.")
+        log.error(f"Invalid or no input schema available for {endpoint=} {input_schema=}")
         return None

sunholo/agents/route.py CHANGED Viewed

@@ -12,18 +12,24 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 from ..logging import log
-from ..utils import load_config_key, load_config
+from ..utils import load_config, ConfigManager
-def route_vac(vector_name: str) -> str :
+def route_vac(vector_name: str=None, config=None) -> str :
     """
     Considers what VAC this vector_name belongs to
     """
-    agent_url = load_config_key('agent_url', vector_name=vector_name, kind="vacConfig")
+    if not vector_name and not config:
+        raise ValueError("Must provide config or vector_name argument")
+    if not config:
+        config = ConfigManager(vector_name)
+    agent_url = config.vacConfig('agent_url')
     if agent_url:
         log.info('agent_url found in llm_config.yaml')
         return agent_url
-    agent = load_config_key('agent', vector_name, kind="vacConfig")
+    agent = config.vacConfig('agent')
     log.info(f'agent_type: {agent}')
     agent_route, _ = load_config('config/cloud_run_urls.json')
@@ -37,15 +43,24 @@ def route_vac(vector_name: str) -> str :
     log.info(f'agent_url: {agent_url}')
     return agent_url
-def route_endpoint(vector_name, method = 'post', override_endpoint=None):
+def route_endpoint(vector_name=None, method = 'post', override_endpoint=None, config=None):
+    if vector_name is None and config is None:
+        raise ValueError('vector_name and config can not both be None')
+    if config:
+        vector_name = config.vector_name
+    if not config:
+        config = ConfigManager(vector_name)
-    agent_type = load_config_key('agent_type', vector_name, kind="vacConfig")
+    agent_type = config.vacConfig('agent_type')
     if not agent_type:
-        agent_type = load_config_key('agent', vector_name, kind="vacConfig")
+        agent_type = config.vacConfig('agent')
-    stem = route_vac(vector_name) if not override_endpoint else override_endpoint
+    stem = route_vac(config=config) if not override_endpoint else override_endpoint
-    agents_config = load_config_key(agent_type, vector_name, kind="agentConfig")
+    agents_config = config.agentConfig(agent_type)
     log.info(f"agents_config: {agents_config}")
     if method not in agents_config:

sunholo/cli/chat_vac.py CHANGED Viewed

@@ -1,19 +1,21 @@
 from ..agents import send_to_qa, handle_special_commands
 from ..streaming import generate_proxy_stream, can_agent_stream
 from ..utils.user_ids import generate_user_id
-from ..utils.config import load_config_key
+from ..utils import ConfigManager
 from ..utils.api_key import has_multivac_api_key
 from ..logging import log
 from ..qna.parsers import parse_output
 from ..gcs.add_file import add_file_to_gcs
 from .run_proxy import clean_proxy_list, start_proxy, stop_proxy
+from ..invoke import invoke_vac
+from ..utils.big_context import has_text_extension, merge_text_files, load_gitignore_patterns, build_file_tree
+import tempfile
 import uuid
 import os
 import sys
 import subprocess
 import json
-import requests
 from pathlib import Path
 from rich import print
@@ -24,13 +26,62 @@ from rich.panel import Panel
 from rich.text import Text
 from rich.table import Table
+def read_and_add_to_user_input(user_input):
+    read_input = None
+    path = user_input.split(" ", 1)[1] if " " in user_input else None
+    if not path:
+        console.print("[bold red]Please provide a valid file or folder path.[/bold red]")
+        return None
+    if os.path.isfile(path):
+        if not has_text_extension(path):
+            console.print("[bold red]Unsupported file type. Please provide a text file or preprocess to text, or use !upload (e.g. images) or `sunholo embed`.[/bold red]")
+            return None
+        try:
+            with open(path, 'r', encoding='utf-8') as file:
+                file_content = file.read()
+            read_input = file_content
+            console.print(f"[bold yellow]File content from {path} read into user_input: [{len(read_input.split())}] words[/bold yellow]")
+        except FileNotFoundError:
+            console.print("[bold red]File not found. Please check the path and try again.[/bold red]")
+            return None
+        except IOError:
+            console.print("[bold red]File could not be read. Please ensure it is a readable text file.[/bold red]")
+            return None
+    elif os.path.isdir(path):
+        patterns = []
+        gitignore_path = os.path.join(path, '.gitignore')
+        if os.path.exists(gitignore_path):
+            patterns = load_gitignore_patterns(gitignore_path)
+        try:
+            with tempfile.NamedTemporaryFile(delete=False, mode='w+', encoding='utf-8') as temp_file:
+                temp_file_path = temp_file.name
+                file_tree = merge_text_files(path, temp_file_path, patterns)
+                console.print(f"[bold yellow]Contents of the folder '{path}' have been merged add added to input.[/bold yellow]")
+                console.print("\n".join(file_tree))
+                temp_file.seek(0)
+                read_input = temp_file.read()
+                console.print(f"[bold yellow]Total words: [{len(read_input.split())}] - watch out for high token costs! Use !clear_read to reset[/bold yellow]")
+            os.remove(temp_file_path)  # Clean up the temporary file
+        except Exception as e:
+            console.print(f"[bold red]An error occurred while reading the folder: {str(e)}[/bold red]")
+            return None
+    else:
+        console.print("[bold red]The provided path is neither a file nor a folder. Please check the path and try again.[/bold red]")
+        return None
+    return read_input
 def get_service_url(vac_name, project, region, no_config=False):
     if no_config:
         agent_name = vac_name
     else:
-        agent_name = load_config_key("agent", vac_name, kind="vacConfig")
+        agent_name = ConfigManager(vac_name).vacConfig("agent")
     proxies = clean_proxy_list()
     if agent_name in proxies:
@@ -50,7 +101,7 @@ def handle_file_upload(file, vector_name):
     if not Path(file).is_file():
         return None
-    agent_name = load_config_key("agent", vector_name, kind="vacConfig")
+    agent_name = ConfigManager(vector_name).vacConfig("agent")
     # vertex can't handle directories
     bucket_filepath = f"{vector_name}/uploads/{os.path.basename(file)}" if agent_name != "vertex-genai" else os.path.basename(file)
@@ -65,7 +116,10 @@ def stream_chat_session(service_url, service_name, stream=True):
     user_id = generate_user_id()
     chat_history = []
-    agent_name = load_config_key("agent", service_name, kind="vacConfig")
+    agent_name = ConfigManager(service_name).vacConfig("agent")
+    file_reply = None
+    read_file = None
+    read_file_count = None
     while True:
         session_id = str(uuid.uuid4())
         user_input = Prompt.ask("[bold cyan]You[/bold cyan]")
@@ -80,9 +134,26 @@ def stream_chat_session(service_url, service_name, stream=True):
         if special_reply:
              console.print(f"[bold yellow]{service_name}:[/bold yellow] {special_reply}", end='\n')
-             continue
-        if user_input.lower().startswith("upload"):
+             continue
+        if user_input.lower().startswith("!read"):
+            read_file = read_and_add_to_user_input(user_input)
+            if read_file:
+                read_file_count = len(read_file.split())
+            continue
+        if user_input.lower().startswith("!ls"):
+            items = os.listdir(os.getcwd())
+            for item in items:
+                console.print(item)
+            continue
+        if user_input.lower().startswith("!tree"):
+            tree = build_file_tree(os.getcwd(), patterns=[])
+            console.print(tree)
+            continue
+        if user_input.lower().startswith("!upload"):
             file_path = user_input.split(" ", 1)[1] if " " in user_input else None
             if not file_path:
                 console.print("[bold red]Please provide a valid file path.[/bold red]")
@@ -94,7 +165,7 @@ def stream_chat_session(service_url, service_name, stream=True):
                     console.print("[bold red]Invalid file upload[/bold red]")
                     continue
-                console.print(f"[bold yellow]{service_name}:[/bold yellow] Uploaded {file_path} to {file_reply} - image will be sent each reply until you issue 'clear_upload' ", end='\n')
+                console.print(f"[bold yellow]{service_name}:[/bold yellow] Uploaded {file_path} to {file_reply} - image will be sent each reply until you issue '!clear_upload' ", end='\n')
             except FileNotFoundError:
                 console.print("[bold red]File not found. Please check the path and try again.[/bold red]")
@@ -102,10 +173,25 @@ def stream_chat_session(service_url, service_name, stream=True):
             # file_reply stays for each message from now on
             continue
-        if user_input.lower().startswith("clear_upload"):
+        if user_input.lower().startswith("!clear_upload"):
             console.print("[bold yellow]File upload path cleared.[/bold yellow]")
             file_path = None
+            continue
+        if user_input.lower().startswith("!clear_read"):
+            console.print("[bold yellow]Read in file(s) cleared.[/bold yellow]")
+            read_file = None
+            read_file_count = None
+            continue
+        if read_file:
+            user_input = f"<user added file>{read_file}</user added file>\n{user_input}"
+        # guardrail
+        if len(user_input)> 1000000:
+            console.print("[bold red]Over 1 million characters in user_input, aborting as probably unintentional. Use API directly instead.[/bold red]")
+            continue
         if not stream:
             vac_response = send_to_qa(user_input,
                 vector_name=service_name,
@@ -165,8 +251,15 @@ def stream_chat_session(service_url, service_name, stream=True):
             response_started = False
             vac_response = ""
-            # point or star?
-            with console.status(f"[bold orange]Thinking...{file_reply}[/bold orange]", spinner="star") as status:
+            thinking = "[bold orange]Thinking...[/bold orange]"
+            if file_reply:
+                thinking = f"[bold orange]Thinking with upload {file_reply} - issue !clear_upload to remove...[/bold orange]"
+            if read_file:
+                thinking = f"{thinking} - [bold orange]additional [{read_file_count}] words added via !read_file contents - issue !clear_read to remove[/bold orange]"
+            with console.status(thinking, spinner="star") as status:
                 for token in stream_response():
                     if not response_started:
                         status.stop()
@@ -274,15 +367,19 @@ def resolve_service_url(args, no_config=False):
         return args.url_override
-    agent_name = load_config_key("agent", args.vac_name, kind="vacConfig")
-    agent_url = load_config_key("agent_url", args.vac_name, "vacConfig")
+    config = ConfigManager(args.vac_name)
+    global_config = ConfigManager("global")
+    agent_name = config.vacConfig("agent")
+    agent_url = config.vacConfig("agent_url")
     if agent_url:
         console.print("Found agent_url within vacConfig: {agent_url}")
     # via public cloud endpoints - assumes no gcloud auth
     if has_multivac_api_key():
         log.debug("Found MULTIVAC_API_KEY")
-        gcp_config = load_config_key("gcp_config", "global", "vacConfig")
+        gcp_config = global_config.vacConfig("gcp_config")
         endpoints_base_url = gcp_config.get("endpoints_base_url")
         if not endpoints_base_url:
             console.print("[bold red]MULTIVAC_API_KEY env var is set but no config.gcp_config.endpoints_base_url can be found[/bold red]")
@@ -310,6 +407,8 @@ def resolve_service_url(args, no_config=False):
 def vac_command(args):
+    config = ConfigManager(args.vac_name)
     if args.action == 'list':
         list_cloud_run_services(args.project, args.region)
@@ -324,7 +423,7 @@ def vac_command(args):
     elif args.action == 'chat':
         service_url = resolve_service_url(args)
-        agent_name   = load_config_key("agent", args.vac_name, kind="vacConfig")
+        agent_name   = config.vacConfig("agent")
         streamer = can_agent_stream(agent_name)
         log.debug(f"streamer: {streamer}")
@@ -334,9 +433,10 @@ def vac_command(args):
         if args.headless:
             headless_mode(service_url, args.vac_name, args.user_input, args.chat_history, stream=streamer)
         else:
-            display_name = load_config_key("display_name", vector_name=args.vac_name,  kind="vacConfig")
-            description  = load_config_key("description", vector_name=args.vac_name, kind="vacConfig")
-            endpoints_config = load_config_key(agent_name, "dummy_value", kind="agentConfig")
+            display_name = config.vacConfig("display_name")
+            description  = config.vacConfig("description")
+            endpoints_config = config.agentConfig(agent_name)
             post_endpoints = endpoints_config['post']
             display_endpoints = ' '.join(f"{key}: {value}" for key, value in post_endpoints.items())
@@ -362,54 +462,6 @@ def vac_command(args):
         invoke_vac(service_url, args.data, is_file=args.is_file)
-def invoke_vac(service_url, data, vector_name=None, metadata=None, is_file=False):
-    try:
-        if is_file:
-            console.print("Uploading file...")
-            # Handle file upload
-            if not isinstance(data, Path) or not data.is_file():
-                raise ValueError("For file uploads, 'data' must be a Path object pointing to a valid file.")
-            files = {
-                'file': (data.name, open(data, 'rb')),
-            }
-            form_data = {
-                'vector_name': vector_name,
-                'metadata': json.dumps(metadata) if metadata else '',
-            }
-            response = requests.post(service_url, files=files, data=form_data)
-        else:
-            console.print("Uploading JSON...")
-            try:
-                if isinstance(data, dict):
-                    json_data = data
-                else:
-                    json_data = json.loads(data)
-            except json.JSONDecodeError as err:
-                console.print(f"[bold red]ERROR: invalid JSON: {str(err)} [/bold red]")
-                sys.exit(1)
-            except Exception as err:
-                console.print(f"[bold red]ERROR: could not parse JSON: {str(err)} [/bold red]")
-                sys.exit(1)
-            log.debug(f"Sending data: {data} or json_data: {json.dumps(json_data)}")
-            # Handle JSON data
-            headers = {"Content-Type": "application/json"}
-            response = requests.post(service_url, headers=headers, data=json.dumps(json_data))
-        response.raise_for_status()
-        the_data = response.json()
-        console.print(the_data)
-        return the_data
-    except requests.exceptions.RequestException as e:
-        console.print(f"[bold red]ERROR: Failed to invoke VAC: {e}[/bold red]")
-    except Exception as e:
-        console.print(f"[bold red]ERROR: An unexpected error occurred: {e}[/bold red]")
 def list_cloud_run_services(project, region):
     """

sunholo/cli/cli.py CHANGED Viewed

@@ -10,7 +10,7 @@ from .chat_vac import setup_vac_subparser
 from .embedder import setup_embedder_subparser
 from .swagger import setup_swagger_subparser
-from ..utils.config import load_config_key
+from ..utils import ConfigManager
 from ..logging import log
@@ -20,9 +20,9 @@ from rich.panel import Panel
 def load_default_gcp_config():
     try:
-        gcp_config = load_config_key('gcp_config', 'global', kind="vacConfig")
+        gcp_config = ConfigManager("global").vacConfig("gcp_config")
     except FileNotFoundError as e:
-        console.print(f"{e} - move config/ folder to working directory or set the _CONFIG_FOLDER environment variable to its location")
+        console.print(f"{e} - move config/ folder to working directory or set the VAC_CONFIG_FOLDER environment variable to its location")
         sys.exit(1)
     if gcp_config:

sunholo/cli/embedder.py CHANGED Viewed

@@ -8,7 +8,8 @@ from pathlib import Path
 from .sun_rich import console
 from rich.progress import Progress
-from .chat_vac import resolve_service_url, invoke_vac
+from ..invoke import invoke_vac
+from .chat_vac import resolve_service_url
 from .run_proxy import stop_proxy
 def create_metadata(vac, metadata):

sunholo/components/retriever.py CHANGED Viewed

@@ -13,7 +13,7 @@
 #   limitations under the License.
 from ..logging import log
 from .vectorstore import pick_vectorstore
-from ..utils import load_config_key
+from ..utils import load_config_key, ConfigManager
 from .llm import get_embeddings
 from ..utils.gcp_project import get_gcp_project
@@ -27,7 +27,7 @@ from langchain.retrievers import ContextualCompressionRetriever
 def load_memories(vector_name):
-    memories = load_config_key("memory", vector_name, kind="vacConfig")
+    memories = ConfigManager(vector_name).vacConfig("memory")
     log.info(f"Found memory settings for {vector_name}: {memories}")
     if not memories or len(memories) == 0:
         log.info(f"No memory settings found for {vector_name}")

sunholo/gcs/add_file.py CHANGED Viewed

@@ -22,7 +22,7 @@ except ImportError:
     storage = None
 from ..logging import log
-from ..utils.config import load_config_key
+from ..utils import load_config_key, ConfigManager
 def handle_base64_image(base64_data: str, vector_name: str, extension: str):
@@ -37,7 +37,8 @@ def handle_base64_image(base64_data: str, vector_name: str, extension: str):
     Returns:
         Tuple[str, str]: The URI of the uploaded image and the MIME type.
     """
-    model = load_config_key("llm", vector_name, "vacConfig")
+    model = ConfigManager(vector_name).vacConfig("llm")
     if model.startswith("openai"):  # pass it to gpt directly
         return base64_data, base64_data.split(",", 1)
@@ -69,16 +70,19 @@ def handle_base64_image(base64_data: str, vector_name: str, extension: str):
 def resolve_bucket(vector_name):
     if os.getenv('EXTENSIONS_BUCKET'):
+        log.warning('Resolving to EXTENSIONS_BUCKET environment variable')
         return os.getenv('EXTENSIONS_BUCKET')
-    bucket_config = load_config_key("upload", vector_name, "vacConfig")
-    if bucket_config:
-        if bucket_config.get("buckets"):
-            bucket_name = bucket_config.get("buckets").get("all")
-    else:
-        bucket_name = os.getenv('GCS_BUCKET')
-        if not bucket_name:
-            raise ValueError("No bucket found to upload to: GCS_BUCKET returned None")
+    if vector_name:
+        bucket_config = ConfigManager(vector_name).vacConfig("upload")
+        if bucket_config:
+            if bucket_config.get("buckets"):
+                bucket_name = bucket_config.get("buckets").get("all")
+    bucket_name = bucket_name or os.getenv('GCS_BUCKET')
+    if not bucket_name:
+        raise ValueError("No bucket found to upload to: GCS_BUCKET returned None")
     if bucket_name.startswith("gs://"):
         bucket_name = bucket_name.removeprefix("gs://")
@@ -86,7 +90,7 @@ def resolve_bucket(vector_name):
     return bucket_name
 def add_file_to_gcs(filename: str,
-                    vector_name:str,
+                    vector_name:str=None,
                     bucket_name: str=None,
                     metadata:dict=None,
                     bucket_filepath:str=None):
@@ -114,7 +118,11 @@ def add_file_to_gcs(filename: str,
     if os.getenv('EXTENSIONS_BUCKET'):
         bucket_filepath = os.path.basename(filename)
+    if vector_name is None:
+            vector_name = "global"
     if not bucket_filepath:
         bucket_filepath = f"{vector_name}/{year}/{month}/{day}/{hour}/{os.path.basename(filename)}"
     bucket_filepath_prev = f"{vector_name}/{year}/{month}/{day}/{hour_prev}/{os.path.basename(filename)}"

sunholo/invoke/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .invoke_vac_utils import invoke_vac, invoke_vac_qa

sunholo 0.72.0__py3-none-any.whl → 0.73.3__py3-none-any.whl

sunholo 0.72.0py3-none-any.whl → 0.73.3py3-none-any.whl