PyPI - zrb - Versions diffs - 1.0.0b2__py3-none-any.whl → 1.0.0b4__py3-none-any.whl - Mend

zrb 1.0.0b2py3-none-any.whl → 1.0.0b4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

zrb/__main__.py CHANGED Viewed

@@ -16,5 +16,8 @@ def serve_cli():
         cli.run(sys.argv[1:])
     except KeyboardInterrupt:
         print(stylize_warning("\nStopped"), file=sys.stderr)
+    except RuntimeError as e:
+        if f"{e}".lower() != "event loop is closed":
+            raise e
     except NodeNotFoundError as e:
         print(stylize_error(f"{e}"), file=sys.stderr)

zrb/builtin/llm/llm_chat.py CHANGED Viewed

@@ -1,16 +1,76 @@
+import json
+import os
+from typing import Any
 from zrb.builtin.group import llm_group
+from zrb.builtin.llm.tool.api import get_current_location, get_current_weather
 from zrb.builtin.llm.tool.cli import run_shell_command
 from zrb.builtin.llm.tool.web import open_web_route, query_internet
 from zrb.config import (
+    LLM_ALLOW_ACCESS_INTERNET,
     LLM_ALLOW_ACCESS_SHELL,
-    LLM_ALLOW_ACCESS_WEB,
-    LLM_HISTORY_FILE,
+    LLM_HISTORY_DIR,
     LLM_MODEL,
     LLM_SYSTEM_PROMPT,
 )
+from zrb.context.any_shared_context import AnySharedContext
+from zrb.input.bool_input import BoolInput
 from zrb.input.str_input import StrInput
 from zrb.input.text_input import TextInput
 from zrb.task.llm_task import LLMTask
+from zrb.util.file import read_file, write_file
+from zrb.util.string.conversion import to_pascal_case
+class PreviousSessionInput(StrInput):
+    def to_html(self, ctx: AnySharedContext) -> str:
+        name = self.name
+        description = self.description
+        default = self.get_default_str(ctx)
+        script = read_file(
+            file_path=os.path.join(os.path.dirname(__file__), "previous-session.js"),
+            replace_map={
+                "CURRENT_INPUT_NAME": name,
+                "CurrentPascalInputName": to_pascal_case(name),
+            },
+        )
+        return "\n".join(
+            [
+                f'<input name="{name}" placeholder="{description}" value="{default}" />',
+                f"<script>{script}</script>",
+            ]
+        )
+def _read_chat_conversation(ctx: AnySharedContext) -> list[dict[str, Any]]:
+    if ctx.input.start_new:
+        return []
+    previous_session_name = ctx.input.previous_session
+    if previous_session_name == "" or previous_session_name is None:
+        last_session_file_path = os.path.join(LLM_HISTORY_DIR, "last-session")
+        if os.path.isfile(last_session_file_path):
+            previous_session_name = read_file(last_session_file_path).strip()
+    conversation_file_path = os.path.join(
+        LLM_HISTORY_DIR, f"{previous_session_name}.json"
+    )
+    if not os.path.isfile(conversation_file_path):
+        return []
+    return json.loads(read_file(conversation_file_path))
+def _write_chat_conversation(
+    ctx: AnySharedContext, conversations: list[dict[str, Any]]
+):
+    os.makedirs(LLM_HISTORY_DIR, exist_ok=True)
+    current_session_name = ctx.session.name
+    conversation_file_path = os.path.join(
+        LLM_HISTORY_DIR, f"{current_session_name}.json"
+    )
+    write_file(conversation_file_path, json.dumps(conversations, indent=2))
+    last_session_file_path = os.path.join(LLM_HISTORY_DIR, "last-session")
+    write_file(last_session_file_path, current_session_name)
 llm_chat: LLMTask = llm_group.add_task(
     LLMTask(
@@ -21,20 +81,38 @@ llm_chat: LLMTask = llm_group.add_task(
                 description="LLM Model",
                 prompt="LLM Model",
                 default_str=LLM_MODEL,
+                allow_positional_parsing=False,
             ),
-            StrInput(
+            TextInput(
                 "system-prompt",
                 description="System prompt",
                 prompt="System prompt",
                 default_str=LLM_SYSTEM_PROMPT,
+                allow_positional_parsing=False,
+            ),
+            BoolInput(
+                "start-new",
+                description="Start new conversation session",
+                prompt="Forget everything and start new conversation session",
+                default_str="false",
+                allow_positional_parsing=False,
             ),
             TextInput("message", description="User message", prompt="Your message"),
+            PreviousSessionInput(
+                "previous-session",
+                description="Previous conversation session",
+                prompt="Previous conversation session (can be empty)",
+                allow_positional_parsing=False,
+                allow_empty=True,
+            ),
         ],
-        history_file=LLM_HISTORY_FILE,
+        conversation_history_reader=_read_chat_conversation,
+        conversation_history_writer=_write_chat_conversation,
         description="Chat with LLM",
         model="{ctx.input.model}",
         system_prompt="{ctx.input['system-prompt']}",
         message="{ctx.input.message}",
+        retries=0,
     ),
     alias="chat",
 )
@@ -42,6 +120,8 @@ llm_chat: LLMTask = llm_group.add_task(
 if LLM_ALLOW_ACCESS_SHELL:
     llm_chat.add_tool(run_shell_command)
-if LLM_ALLOW_ACCESS_WEB:
+if LLM_ALLOW_ACCESS_INTERNET:
     llm_chat.add_tool(open_web_route)
     llm_chat.add_tool(query_internet)
+    llm_chat.add_tool(get_current_location)
+    llm_chat.add_tool(get_current_weather)

zrb/builtin/llm/previous-session.js ADDED Viewed

@@ -0,0 +1,13 @@
+let hasUpdateCurrentPascalInputName = false;
+document.getElementById("submit-task-form").addEventListener("change", async function(event) {
+    const currentInput = event.target;
+    if (hasUpdateCurrentPascalInputName || currentInput.name === "CURRENT_INPUT_NAME") {
+        return
+    }
+    const previousSessionInput = submitTaskForm.querySelector('[name="CURRENT_INPUT_NAME"]');
+    if (previousSessionInput) {
+        const currentSessionName = cfg.SESSION_NAME
+        previousSessionInput.value = currentSessionName;
+    }
+    hasUpdateCurrentPascalInputName = true;
+});

zrb/builtin/llm/tool/api.py ADDED Viewed

@@ -0,0 +1,29 @@
+import json
+from typing import Annotated, Literal
+import requests
+def get_current_location() -> (
+    Annotated[str, "JSON string representing latitude and longitude"]
+):  # noqa
+    """Get the user's current location."""
+    return json.dumps(requests.get("http://ip-api.com/json?fields=lat,lon").json())
+def get_current_weather(
+    latitude: float,
+    longitude: float,
+    temperature_unit: Literal["celsius", "fahrenheit"],
+) -> str:
+    """Get the current weather in a given location."""
+    resp = requests.get(
+        "https://api.open-meteo.com/v1/forecast",
+        params={
+            "latitude": latitude,
+            "longitude": longitude,
+            "temperature_unit": temperature_unit,
+            "current_weather": True,
+        },
+    )
+    return json.dumps(resp.json())

zrb/builtin/llm/tool/cli.py CHANGED Viewed

@@ -2,7 +2,7 @@ import subprocess
 def run_shell_command(command: str) -> str:
-    """Running a shell command"""
+    """Running an actual shell command on user's computer."""
     output = subprocess.check_output(
         command, shell=True, stderr=subprocess.STDOUT, text=True
     )

zrb/builtin/llm/tool/rag.py CHANGED Viewed

@@ -1,9 +1,10 @@
+import hashlib
 import json
 import os
 import sys
-from collections.abc import Callable, Iterable
 import litellm
+import ulid
 from zrb.config import (
     RAG_CHUNK_SIZE,
@@ -13,10 +14,6 @@ from zrb.config import (
 )
 from zrb.util.cli.style import stylize_error, stylize_faint
 from zrb.util.file import read_file
-from zrb.util.run import run_async
-Document = str | Callable[[], str]
-Documents = Callable[[], Iterable[Document]] | Iterable[Document]
 def create_rag_from_directory(
@@ -30,86 +27,87 @@ def create_rag_from_directory(
     overlap: int = RAG_OVERLAP,
     max_result_count: int = RAG_MAX_RESULT_COUNT,
 ):
-    return create_rag(
-        tool_name=tool_name,
-        tool_description=tool_description,
-        documents=get_rag_documents(os.path.expanduser(document_dir_path)),
-        model=model,
-        vector_db_path=vector_db_path,
-        vector_db_collection=vector_db_collection,
-        reset_db=get_rag_reset_db(
-            document_dir_path=os.path.expanduser(document_dir_path),
-            vector_db_path=os.path.expanduser(vector_db_path),
-        ),
-        chunk_size=chunk_size,
-        overlap=overlap,
-        max_result_count=max_result_count,
-    )
-def create_rag(
-    tool_name: str,
-    tool_description: str,
-    documents: Documents = [],
-    model: str = RAG_EMBEDDING_MODEL,
-    vector_db_path: str = "./chroma",
-    vector_db_collection: str = "documents",
-    reset_db: Callable[[], bool] | bool = False,
-    chunk_size: int = RAG_CHUNK_SIZE,
-    overlap: int = RAG_OVERLAP,
-    max_result_count: int = RAG_MAX_RESULT_COUNT,
-) -> Callable[[str], str]:
     async def retrieve(query: str) -> str:
-        import chromadb
+        from chromadb import PersistentClient
         from chromadb.config import Settings
-        is_db_exist = os.path.isdir(vector_db_path)
-        client = chromadb.PersistentClient(
+        client = PersistentClient(
             path=vector_db_path, settings=Settings(allow_reset=True)
         )
-        should_reset_db = (
-            await run_async(reset_db()) if callable(reset_db) else reset_db
-        )
-        if (not is_db_exist) or should_reset_db:
-            client.reset()
-            collection = client.get_or_create_collection(vector_db_collection)
-            chunk_index = 0
-            print(stylize_faint("Scanning documents"), file=sys.stderr)
-            docs = await run_async(documents()) if callable(documents) else documents
-            for document in docs:
-                if callable(document):
-                    try:
-                        document = await run_async(document())
-                    except Exception as error:
-                        print(stylize_error(f"Error: {error}"), file=sys.stderr)
-                        continue
-                for i in range(0, len(document), chunk_size - overlap):
-                    chunk = document[i : i + chunk_size]
-                    if len(chunk) > 0:
-                        print(
-                            stylize_faint(f"Vectorize chunk {chunk_index}"),
-                            file=sys.stderr,
-                        )
-                        response = await litellm.aembedding(model=model, input=[chunk])
-                        vector = response["data"][0]["embedding"]
-                        print(
-                            stylize_faint(f"Adding chunk {chunk_index} to db"),
-                            file=sys.stderr,
-                        )
-                        collection.upsert(
-                            ids=[f"id{chunk_index}"],
-                            embeddings=[vector],
-                            documents=[chunk],
-                        )
-                        chunk_index += 1
         collection = client.get_or_create_collection(vector_db_collection)
-        # Generate embedding for the query
-        print(stylize_faint("Vectorize query"), file=sys.stderr)
+        # Track file changes using a hash-based approach
+        hash_file_path = os.path.join(vector_db_path, "file_hashes.json")
+        previous_hashes = _load_hashes(hash_file_path)
+        current_hashes = {}
+        updated_files = []
+        for root, _, files in os.walk(document_dir_path):
+            for file in files:
+                file_path = os.path.join(root, file)
+                file_hash = _compute_file_hash(file_path)
+                relative_path = os.path.relpath(file_path, document_dir_path)
+                current_hashes[relative_path] = file_hash
+                if previous_hashes.get(relative_path) != file_hash:
+                    updated_files.append(file_path)
+        if updated_files:
+            print(
+                stylize_faint(f"Updating {len(updated_files)} changed files"),
+                file=sys.stderr,
+            )
+            for file_path in updated_files:
+                try:
+                    relative_path = os.path.relpath(file_path, document_dir_path)
+                    collection.delete(where={"file_path": relative_path})
+                    content = _read_file_content(file_path)
+                    file_id = ulid.new().str
+                    for i in range(0, len(content), chunk_size - overlap):
+                        chunk = content[i : i + chunk_size]
+                        if chunk:
+                            chunk_id = ulid.new().str
+                            print(
+                                stylize_faint(
+                                    f"Vectorizing {relative_path} chunk {chunk_id}"
+                                ),
+                                file=sys.stderr,
+                            )
+                            response = await litellm.aembedding(
+                                model=model, input=[chunk]
+                            )
+                            vector = response["data"][0]["embedding"]
+                            collection.upsert(
+                                ids=[chunk_id],
+                                embeddings=[vector],
+                                documents=[chunk],
+                                metadatas={
+                                    "file_path": relative_path,
+                                    "file_id": file_id,
+                                },
+                            )
+                except Exception as e:
+                    print(
+                        stylize_error(f"Error processing {file_path}: {e}"),
+                        file=sys.stderr,
+                    )
+            _save_hashes(hash_file_path, current_hashes)
+        else:
+            print(
+                stylize_faint("No changes detected. Skipping database update."),
+                file=sys.stderr,
+            )
+        print(stylize_faint("Vectorizing query"), file=sys.stderr)
         query_response = await litellm.aembedding(model=model, input=[query])
-        print(stylize_faint("Search documents"), file=sys.stderr)
-        # Search for the top_k most similar documents
+        query_vector = query_response["data"][0]["embedding"]
+        print(stylize_faint("Searching documents"), file=sys.stderr)
         results = collection.query(
-            query_embeddings=query_response["data"][0]["embedding"],
+            query_embeddings=query_vector,
             n_results=max_result_count,
         )
         return json.dumps(results)
@@ -119,71 +117,36 @@ def create_rag(
     return retrieve
-def get_rag_documents(document_dir_path: str) -> Callable[[], list[Callable[[], str]]]:
-    def get_documents() -> list[Callable[[], str]]:
-        # Walk through the directory
-        readers = []
-        for root, _, files in os.walk(document_dir_path):
-            for file in files:
-                file_path = os.path.join(root, file)
-                if file_path.lower().endswith(".pdf"):
-                    readers.append(_get_pdf_reader(file_path))
-                    continue
-                readers.append(_get_text_reader(file_path))
-        return readers
-    return get_documents
-def _get_text_reader(file_path: str):
-    def read():
-        print(stylize_faint(f"Start reading {file_path}"), file=sys.stderr)
-        content = read_file(file_path)
-        print(stylize_faint(f"Complete reading {file_path}"), file=sys.stderr)
-        return content
-    return read
-def _get_pdf_reader(file_path):
-    def read():
-        import pdfplumber
-        print(stylize_faint(f"Start reading {file_path}"), file=sys.stderr)
-        contents = []
-        with pdfplumber.open(file_path) as pdf:
-            for page in pdf.pages:
-                contents.append(page.extract_text())
-        print(stylize_faint(f"Complete reading {file_path}"), file=sys.stderr)
-        return "\n".join(contents)
-    return read
-def get_rag_reset_db(
-    document_dir_path: str, vector_db_path: str = "./chroma"
-) -> Callable[[], bool]:
-    def should_reset_db() -> bool:
-        document_exist = os.path.isdir(document_dir_path)
-        if not document_exist:
-            raise ValueError(f"Document directory not exists: {document_dir_path}")
-        vector_db_exist = os.path.isdir(vector_db_path)
-        if not vector_db_exist:
-            return True
-        document_mtime = _get_most_recent_mtime(document_dir_path)
-        vector_db_mtime = _get_most_recent_mtime(vector_db_path)
-        return document_mtime > vector_db_mtime
-    return should_reset_db
-def _get_most_recent_mtime(directory):
-    most_recent_mtime = 0
-    for root, dirs, files in os.walk(directory):
-        # Check mtime for directories
-        for name in dirs + files:
-            file_path = os.path.join(root, name)
-            mtime = os.path.getmtime(file_path)
-            if mtime > most_recent_mtime:
-                most_recent_mtime = mtime
-    return most_recent_mtime
+def _compute_file_hash(file_path: str) -> str:
+    hash_md5 = hashlib.md5()
+    with open(file_path, "rb") as f:
+        for chunk in iter(lambda: f.read(4096), b""):
+            hash_md5.update(chunk)
+    return hash_md5.hexdigest()
+def _read_file_content(file_path: str) -> str:
+    if file_path.lower().endswith(".pdf"):
+        return _read_pdf(file_path)
+    return read_file(file_path)
+def _read_pdf(file_path: str) -> str:
+    import pdfplumber
+    with pdfplumber.open(file_path) as pdf:
+        return "\n".join(
+            page.extract_text() for page in pdf.pages if page.extract_text()
+        )
+def _load_hashes(file_path: str) -> dict:
+    if os.path.exists(file_path):
+        with open(file_path, "r") as f:
+            return json.load(f)
+    return {}
+def _save_hashes(file_path: str, hashes: dict):
+    with open(file_path, "w") as f:
+        json.dump(hashes, f)

zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/_zrb/entity/template/client_method.py CHANGED Viewed

@@ -18,17 +18,17 @@ async def get_my_entities(
 @abstractmethod
-async def create_my_entity(self, data: MyEntityCreateWithAudit) -> MyEntityResponse:
-    """Create a new my entities"""
-@abstractmethod
-async def create_my_entity(
+async def create_my_entity_bulk(
     self, data: list[MyEntityCreateWithAudit]
 ) -> list[MyEntityResponse]:
     """Create new my entities"""
+@abstractmethod
+async def create_my_entity(self, data: MyEntityCreateWithAudit) -> MyEntityResponse:
+    """Create a new my entities"""
 @abstractmethod
 async def update_my_entity_bulk(
     self, my_entity_ids: list[str], data: MyEntityUpdateWithAudit

zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/_zrb/entity/template/gateway_subroute.py CHANGED Viewed

@@ -51,7 +51,9 @@ async def update_my_entity_bulk(my_entity_ids: list[str], data: MyEntityUpdate):
     response_model=MyEntityResponse,
 )
 async def update_my_entity(my_entity_id: str, data: MyEntityUpdate):
-    return await my_module_client.update_my_entity(data.with_audit(updated_by="system"))
+    return await my_module_client.update_my_entity(
+        my_entity_id, data.with_audit(updated_by="system")
+    )
 @app.delete(

zrb 1.0.0b2__py3-none-any.whl → 1.0.0b4__py3-none-any.whl

zrb 1.0.0b2py3-none-any.whl → 1.0.0b4py3-none-any.whl