PyPI - ws-bom-robot-app - Versions diffs - 0.0.78__py3-none-any.whl → 0.0.80__py3-none-any.whl - Mend

ws-bom-robot-app 0.0.78py3-none-any.whl → 0.0.80py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

ws_bom_robot_app/config.py +3 -1
ws_bom_robot_app/cron_manager.py +8 -7
ws_bom_robot_app/llm/main.py +12 -3
ws_bom_robot_app/llm/models/api.py +33 -0
ws_bom_robot_app/llm/nebuly_handler.py +14 -10
ws_bom_robot_app/llm/providers/llm_manager.py +94 -26
ws_bom_robot_app/llm/utils/cleanup.py +74 -0
ws_bom_robot_app/llm/utils/download.py +107 -1
ws_bom_robot_app/main.py +4 -1
{ws_bom_robot_app-0.0.78.dist-info → ws_bom_robot_app-0.0.80.dist-info}/METADATA +74 -49
{ws_bom_robot_app-0.0.78.dist-info → ws_bom_robot_app-0.0.80.dist-info}/RECORD +13 -13
ws_bom_robot_app/llm/utils/kb.py +0 -34
{ws_bom_robot_app-0.0.78.dist-info → ws_bom_robot_app-0.0.80.dist-info}/WHEEL +0 -0
{ws_bom_robot_app-0.0.78.dist-info → ws_bom_robot_app-0.0.80.dist-info}/top_level.txt +0 -0

ws_bom_robot_app/config.py CHANGED Viewed

@@ -14,6 +14,8 @@ class Settings(BaseSettings):
     robot_data_db_folder_out: str = 'out'
     robot_data_db_folder_store: str = 'store'
     robot_data_db_retention_days: float = 60
+    robot_data_attachment_folder: str = 'attachment'
+    robot_data_attachment_retention_days: float = 1
     robot_loader_max_threads: int = 1
     robot_task_max_total_parallelism: int = 2 * (os.cpu_count() or 1)
     robot_task_retention_days: float = 1
@@ -30,7 +32,7 @@ class Settings(BaseSettings):
     WATSONX_URL: str = ''
     WATSONX_APIKEY: str = ''
     WATSONX_PROJECTID: str = ''
-    NEBULY_API_URL: str =''
+    NEBULY_API_URL: str ='https://backend.nebuly.com/'
     GOOGLE_APPLICATION_CREDENTIALS: str = '' # path to google credentials iam file, e.d. ./.secrets/google-credentials.json
     model_config = ConfigDict(
         env_file='./.env',

ws_bom_robot_app/cron_manager.py CHANGED Viewed

@@ -8,7 +8,7 @@ from apscheduler.triggers.date import DateTrigger
 from fastapi import APIRouter
 from datetime import datetime
 from ws_bom_robot_app.task_manager import task_manager
-from ws_bom_robot_app.llm.utils.kb import kb_cleanup_data_file
+from ws_bom_robot_app.llm.utils.cleanup import kb_cleanup_data_file, chat_cleanup_attachment
 from ws_bom_robot_app.util import _log
 from ws_bom_robot_app.config import config
@@ -57,7 +57,8 @@ class Job:
 class CronManager:
     _list_default = [
             Job('cleanup-task',task_manager.cleanup_task, interval=5 * 60),
-            Job('cleanup-data',kb_cleanup_data_file, interval=180 * 60),
+            Job('cleanup-kb-data',kb_cleanup_data_file, interval=180 * 60),
+            Job('cleanup-chat-attachment',chat_cleanup_attachment, interval=120 * 60),
         ]
     def __get_jobstore_strategy(self) -> JobstoreStrategy:
         if True or config.runtime_options().is_multi_process:
@@ -139,22 +140,22 @@ class CronManager:
     def execute_recurring_jobs(self):
         for job in self.scheduler.get_jobs():
-            if job.interval:
-                job.job_func()
+            if job.trigger.interval:
+                job.func()
     def pause_recurring_jobs(self):
         for job in self.scheduler.get_jobs():
-            if job.interval:
+            if job.trigger.interval:
                 self.pause_job(job.id)
     def resume_recurring_jobs(self):
         for job in self.scheduler.get_jobs():
-            if job.interval:
+            if job.trigger.interval:
                 self.resume_job(job.id)
     def remove_recurring_jobs(self):
         for job in self.scheduler.get_jobs():
-            if job.interval:
+            if job.trigger.interval:
                 self.remove_job(job.id)
     def clear(self):

ws_bom_robot_app/llm/main.py CHANGED Viewed

@@ -39,6 +39,7 @@ def _parse_formatted_message(message: str) -> str:
   except:
     result = message
   return result
 async def __stream(rq: StreamRequest, ctx: Request, queue: Queue, formatted: bool = True) -> None:
   #os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
@@ -47,11 +48,21 @@ async def __stream(rq: StreamRequest, ctx: Request, queue: Queue, formatted: boo
   for tool in rq.app_tools:
     tool.thread_id = rq.thread_id
+  #llm
+  __llm: LlmInterface = rq.get_llm()
   #chat history
   chat_history: list[BaseMessage] = []
   for message in rq.messages:
     if message.role in ["human","user"]:
-      chat_history.append(HumanMessage(content=message.content))
+      _content = message.content
+      # multimodal content parsing
+      if isinstance(_content, list):
+        try:
+          _content = await __llm.format_multimodal_content(_content)
+        except Exception as e:
+          logging.warning(f"Error parsing multimodal content {_content[:100]}: {e}")
+      chat_history.append(HumanMessage(content=_content))
     elif message.role in ["ai","assistant"]:
       message_content = ""
       if formatted:
@@ -78,8 +89,6 @@ async def __stream(rq: StreamRequest, ctx: Request, queue: Queue, formatted: boo
       if message_content:
         chat_history.append(AIMessage(content=message_content))
-  #llm
-  __llm: LlmInterface = rq.get_llm()
   #agent handler
   if formatted:

ws_bom_robot_app/llm/models/api.py CHANGED Viewed

@@ -11,6 +11,39 @@ import os, shutil, uuid
 from ws_bom_robot_app.config import Settings, config
 class LlmMessage(BaseModel):
+  """
+  💬 multimodal chat
+  The multimodal message allows users to interact with the application using both text and media files.
+  `robot` accept multimodal input in a uniform way, regarding the llm provider used.
+  - simple message
+  ```json
+  {
+    "role": "user",
+    "content": "What is the capital of France?"
+  }
+  ```
+  - multimodal message
+  ```jsonc
+  {
+    "role": "user",
+    "content": [
+      { "type": "text", "text": "Read carefully all the attachments, analize the content and provide a summary for each one:" },
+      { "type": "image", "url": "https://www.example.com/image/foo.jpg" },
+      { "type": "file", "url": "https://www.example.com/pdf/bar.pdf" },
+      { "type": "file", "url": "data:plain/text;base64,CiAgICAgIF9fX19fCiAgICAgLyAgIC..." }, // base64 encoded file
+      { "type": "media", "mime_type": "plain/text", "data": "CiAgICAgIF9fX19fCiAgICAgLyAgIC..." } // google/gemini specific input format
+    ]
+  }
+  ```
+  > 💡 `url` can be a remote url or a base64 representation of the file: [rfc 2397](https://datatracker.ietf.org/doc/html/rfc2397).
+  Can also be used the llm/model specific input format.
+  """
   role: str
   content: Union[str, list]

ws_bom_robot_app/llm/nebuly_handler.py CHANGED Viewed

@@ -145,16 +145,20 @@ class NebulyHandler(AsyncCallbackHandler):
         return payload
     def __parse_multimodal_input(self, input: list[dict]) -> str:
-        # Parse the multimodal input and return a string representation
-        # This is a placeholder implementation, you can customize it as needed
-        parsed_input = ""
-        for item in input:
-            if item.get("type") == "text":
-                parsed_input += item.get("text", "")
-            elif item.get("type") == "image_url":
-                parsed_input += " <image>"
-        # print(parsed_input)
-        return parsed_input
+      """Parse multimodal input and return a string representation."""
+      type_mapping = {
+        "text": lambda item: item.get("text", ""),
+        "image": lambda _: " <image>",
+        "image_url": lambda _: " <image>",
+        "file": lambda _: " <file>",
+        "media": lambda _: " <file>",
+        "document": lambda _: " <file>",
+      }
+      return "".join(
+        type_mapping.get(item.get("type", ""), lambda item: f" <{item.get('type', '')}>")
+        (item) for item in input
+      )
     def __parse_multimodal_history(self, messages: list[dict]) -> list[dict]:
         # Parse the multimodal history and return a list of dictionaries

ws_bom_robot_app/llm/providers/llm_manager.py CHANGED Viewed

@@ -3,6 +3,7 @@ from langchain_core.embeddings import Embeddings
 from langchain_core.language_models import BaseChatModel
 from pydantic import BaseModel, ConfigDict, Field
 import os
+from ws_bom_robot_app.llm.utils.download import Base64File
 class LlmConfig(BaseModel):
     api_url: Optional[str] = None
@@ -35,6 +36,30 @@ class LlmInterface:
     def get_parser(self):
         from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
         return OpenAIToolsAgentOutputParser()
+    async def _format_multimodal_image_message(self, message: dict) -> dict:
+        return {
+            "type": "image_url",
+            "image_url": {
+                "url": message.get("url")
+            }
+        }
+    async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
+        _file = file or await Base64File.from_url(message.get("url"))
+        return {"type": "text", "text": f"Here's a file attachment named `{_file.name}` of type `{_file.mime_type}` in base64: `{_file.base64_content}`"}
+    async def format_multimodal_content(self, content: list) -> list:
+        _content = []
+        for message in content:
+            if isinstance(message, dict):
+                if message.get("type") == "image" and "url" in message:
+                    _content.append(await self._format_multimodal_image_message(message))
+                elif message.get("type") == "file" and "url" in message:
+                    _content.append(await self._format_multimodal_file_message(message))
+                else:
+                    # pass through text or other formats unchanged
+                    _content.append(message)
+            else:
+                _content.append(message)
+        return _content
 class Anthropic(LlmInterface):
     def get_llm(self):
@@ -62,6 +87,16 @@ class Anthropic(LlmInterface):
         response = client.models.list()
         return response.data
+    async def _format_multimodal_image_message(self, message: dict) -> dict:
+        file = await Base64File.from_url(message.get("url"))
+        return { "type": "image_url", "image_url": { "url": file.base64_url }}
+    async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
+        _file = file or await Base64File.from_url(message.get("url"))
+        if _file.extension in ["pdf"]:
+            return {"type": "document", "source": {"type": "base64", "media_type": _file.mime_type, "data": _file.base64_content}}
+        else:
+          return await super()._format_multimodal_file_message(message, _file)
 class OpenAI(LlmInterface):
     def __init__(self, config: LlmConfig):
         super().__init__(config)
@@ -84,6 +119,13 @@ class OpenAI(LlmInterface):
         response = openai.models.list()
         return response.data
+    async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
+        _file = file or await Base64File.from_url(message.get("url"))
+        if _file.extension in ["pdf"]:
+            return {"type": "file", "file": { "source_type": "base64", "file_data": _file.base64_url, "mime_type": _file.mime_type, "filename": _file.name}}
+        else:
+          return await super()._format_multimodal_file_message(message, _file)
 class DeepSeek(LlmInterface):
     def get_llm(self):
         from langchain_openai import ChatOpenAI
@@ -104,33 +146,48 @@ class DeepSeek(LlmInterface):
         response = openai.models.list()
         return response.data
+    async def _format_multimodal_image_message(self, message: dict) -> dict:
+        print(f"{DeepSeek.__name__} does not support image messages")
+        return None
+    async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
+        print(f"{DeepSeek.__name__} does not support file messages")
+        return None
 class Google(LlmInterface):
-  def get_llm(self):
-    from langchain_google_genai.chat_models import ChatGoogleGenerativeAI
-    return ChatGoogleGenerativeAI(
-      model=self.config.model,
-      google_api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"),
-      temperature=self.config.temperature,
-      disable_streaming=False,
-    )
-  def get_embeddings(self):
-    from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
-    return GoogleGenerativeAIEmbeddings(
-      google_api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"),
-      model="models/text-embedding-004")
-  def get_models(self):
-    import google.generativeai as genai
-    genai.configure(api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"))
-    response = genai.list_models()
-    return [{
-      "id": model.name,
-      "name": model.display_name,
-      "description": model.description,
-      "input_token_limit": model.input_token_limit,
-      "output_token_limit": model.output_token_limit
-    } for model in response if "gemini" in model.name.lower()]
+    def get_llm(self):
+      from langchain_google_genai.chat_models import ChatGoogleGenerativeAI
+      return ChatGoogleGenerativeAI(
+        model=self.config.model,
+        google_api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"),
+        temperature=self.config.temperature,
+        disable_streaming=False,
+      )
+    def get_embeddings(self):
+      from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
+      return GoogleGenerativeAIEmbeddings(
+        google_api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"),
+        model="models/text-embedding-004")
+    def get_models(self):
+      import google.generativeai as genai
+      genai.configure(api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"))
+      response = genai.list_models()
+      return [{
+        "id": model.name,
+        "name": model.display_name,
+        "description": model.description,
+        "input_token_limit": model.input_token_limit,
+        "output_token_limit": model.output_token_limit
+      } for model in response if "gemini" in model.name.lower()]
+    async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
+        _file = file or await Base64File.from_url(message.get("url"))
+        if _file.extension in ["pdf", "csv"]:
+            return {"type": "media", "mime_type": _file.mime_type, "data": _file.base64_content }
+        else:
+          return await super()._format_multimodal_file_message(message, _file)
 class Gvertex(LlmInterface):
     def get_llm(self):
@@ -168,6 +225,13 @@ class Gvertex(LlmInterface):
         finally:
           return _models
+    async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
+        _file = file or await Base64File.from_url(message.get("url"))
+        if _file.extension in ["pdf", "csv"]:
+            return {"type": "media", "mime_type": _file.mime_type, "data": _file.base64_content }
+        else:
+          return await super()._format_multimodal_file_message(message, _file)
 class Groq(LlmInterface):
     def get_llm(self):
         from langchain_groq import ChatGroq
@@ -286,6 +350,10 @@ class Ollama(LlmInterface):
               "details": model['details']
             } for model in models]
+    async def _format_multimodal_image_message(self, message: dict) -> dict:
+        file = await Base64File.from_url(message.get("url"))
+        return { "type": "image_url", "image_url": { "url": file.base64_url }}
 class LlmManager:
     #class variables (static)

ws_bom_robot_app/llm/utils/cleanup.py ADDED Viewed

@@ -0,0 +1,74 @@
+import os, logging
+from ws_bom_robot_app.config import config
+from datetime import datetime, timedelta
+def _cleanup_data_file(folders: list[str], retention: float) -> dict:
+    """
+    clean up old data files in the specified folder
+    Returns:
+    - Dictionary with cleanup statistics
+    """
+    _deleted_files = []
+    _deleted_dirs = []
+    _freed_space = 0
+    for folder in folders:
+        if not os.path.exists(folder):
+            logging.warning(f"Folder does not exist: {folder}")
+            continue
+        # delete old files
+        for root, dirs, files in os.walk(folder, topdown=False):
+            for file in files:
+                file_path = os.path.join(root, file)
+                try:
+                    file_stat = os.stat(file_path)
+                    file_creation_time = datetime.fromtimestamp(file_stat.st_mtime)
+                    if file_creation_time < datetime.now() - timedelta(days=retention):
+                        _freed_space += file_stat.st_size
+                        os.remove(file_path)
+                        _deleted_files.append(file_path)
+                except (OSError, IOError) as e:
+                    logging.error(f"Error deleting file {file_path}: {e}")
+        # clean up empty directories (bottom-up)
+        for root, dirs, files in os.walk(folder, topdown=False):
+            # skip the root folder itself
+            if root == folder:
+                continue
+            try:
+                # check if directory is empty
+                if not os.listdir(root):
+                    os.rmdir(root)
+                    _deleted_dirs.append(root)
+            except OSError as e:
+                logging.debug(f"Could not remove directory {root}: {e}")
+    logging.info(f"Deleted {len(_deleted_files)} files; Freed space: {_freed_space / (1024 * 1024):.2f} MB")
+    return {
+        "deleted_files_count": len(_deleted_files),
+        "deleted_dirs_count": len(_deleted_dirs),
+        "freed_space_mb": _freed_space / (1024 * 1024)
+    }
+def kb_cleanup_data_file() -> dict:
+    """
+    clean up vector db data files
+    """
+    folders = [
+        os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_out),
+        os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_store),
+        os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_src)
+    ]
+    return _cleanup_data_file(folders, config.robot_data_db_retention_days)
+def chat_cleanup_attachment() -> dict:
+    """
+    clean up chat attachment files
+    """
+    folders = [
+        os.path.join(config.robot_data_folder, config.robot_data_attachment_folder)
+        ]
+    return _cleanup_data_file(folders, config.robot_data_attachment_retention_days)

ws_bom_robot_app/llm/utils/download.py CHANGED Viewed

@@ -1,6 +1,12 @@
 from typing import List,Optional
-import os, logging, aiohttp, asyncio
+import os, logging, aiohttp, asyncio, hashlib, json
+import uuid
+from pydantic import BaseModel
+import base64, requests, mimetypes
+from urllib.parse import urlparse
 from tqdm.asyncio import tqdm
+from ws_bom_robot_app.config import config
+import aiofiles
 async def download_files(urls: List[str], destination_folder: str, authorization: str = None):
     tasks = [download_file(file, os.path.join(destination_folder, os.path.basename(file)), authorization=authorization) for file in urls]
@@ -77,3 +83,103 @@ async def download_file(url: str, destination: str, chunk_size: int = 8192, auth
               logging.info(f"Cleaned up incomplete download: {destination}")
           except OSError:
               pass
+# ensuse attachment folder exists
+os.makedirs(os.path.join(config.robot_data_folder, config.robot_data_attachment_folder), exist_ok=True)
+class Base64File(BaseModel):
+    """Base64 encoded file representation"""
+    url: str
+    base64_url: str
+    base64_content: str
+    name: str
+    extension: str
+    mime_type: str
+    @staticmethod
+    def _is_base64_data_uri(url: str) -> bool:
+        """Check if URL is already a base64 data URI"""
+        return (isinstance(url, str) and
+                url.startswith('data:') and
+                ';base64,' in url and
+                len(url.split(',')) == 2)
+    async def from_url(url: str) -> "Base64File":
+      """Download file and return as base64 data URI"""
+      def _cache_file(url: str) -> str:
+          _hash = hashlib.md5(url.encode()).hexdigest()
+          return os.path.join(config.robot_data_folder, config.robot_data_attachment_folder, f"{_hash}.json")
+      async def from_cache(url: str) -> "Base64File":
+        """Check if file is already downloaded and return data"""
+        _file = _cache_file(url)
+        if os.path.exists(_file):
+          try:
+            async with aiofiles.open(_file, 'rb') as f:
+              content = await f.read()
+            return Base64File(**json.loads(content))
+          except Exception as e:
+              logging.error(f"Error reading cache file {_file}: {e}")
+              return None
+        return None
+      async def to_cache(file: "Base64File", url: str) -> None:
+        """Save file to cache"""
+        _file = _cache_file(url)
+        try:
+            async with aiofiles.open(_file, 'wb') as f:
+                await f.write(file.model_dump_json().encode('utf-8'))
+        except Exception as e:
+            logging.error(f"Error writing cache file {_file}: {e}")
+     # special case: base64 data URI
+      if Base64File._is_base64_data_uri(url):
+            mime_type = url.split(';')[0].replace('data:', '')
+            base64_content = url.split(',')[1]
+            extension=mime_type.split('/')[-1]
+            name = f"file-{uuid.uuid4()}.{extension}"
+            return Base64File(
+              url=url,
+              base64_url=url,
+              base64_content=base64_content,
+              name=name,
+              extension=extension,
+              mime_type=mime_type
+            )
+      # default download
+      _error = None
+      try:
+          if _content := await from_cache(url):
+              return _content
+          async with aiohttp.ClientSession() as session:
+            async with session.get(url, timeout=aiohttp.ClientTimeout(total=30), headers={"User-Agent": "Mozilla/5.0"}) as response:
+              print(f"Downloading {url} - Status: {response.status}")
+              response.raise_for_status()
+              content = await response.read()
+          # mime type detection
+          mime_type = response.headers.get('content-type', '').split(';')[0]
+          if not mime_type:
+              mime_type, _ = mimetypes.guess_type(urlparse(url).path)
+          if not mime_type:
+              mime_type = 'application/octet-stream'
+          # to base64
+          base64_content = base64.b64encode(content).decode('utf-8')
+          name = url.split('/')[-1]
+          extension = name.split('.')[-1]
+      except Exception as e:
+          _error = f"Failed to download file from {url}: {e}"
+          logging.error(_error)
+          base64_content = base64.b64encode(_error.encode('utf-8')).decode('utf-8')
+          name = "download_error.txt"
+          mime_type = "text/plain"
+          extension = "txt"
+      _file = Base64File(
+          url=url,
+          base64_url= f"data:{mime_type};base64,{base64_content}",
+          base64_content=base64_content,
+          name=name,
+          extension=extension,
+          mime_type=mime_type
+      )
+      if not _error:
+        await to_cache(_file, url)
+      return _file

ws_bom_robot_app/main.py CHANGED Viewed

@@ -3,7 +3,7 @@ import platform
 from fastapi.responses import FileResponse
 import uvicorn, os, sys
 from fastapi import FastAPI, Depends
-from fastapi.openapi.docs import get_swagger_ui_html
+from fastapi.openapi.docs import get_swagger_ui_html, get_redoc_html
 from fastapi.openapi.utils import get_openapi
 from ws_bom_robot_app.auth import authenticate
 from ws_bom_robot_app.config import config
@@ -31,6 +31,9 @@ async def favicon():
 @app.get("/docs", include_in_schema=False)
 async def get_swagger_documentation(authenticate: bool = Depends(authenticate)):
     return get_swagger_ui_html(openapi_url="/openapi.json", title="docs")
+@app.get("/redoc", include_in_schema=False)
+async def get_redoc_documentation(authenticate: bool = Depends(authenticate)):
+    return get_redoc_html(openapi_url="/openapi.json", title="docs")
 @app.get("/openapi.json", include_in_schema=False)
 async def openapi(authenticate: bool = Depends(authenticate)):
     return get_openapi(title=app.title, version=app.version, routes=app.routes)

{ws_bom_robot_app-0.0.78.dist-info → ws_bom_robot_app-0.0.80.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ws_bom_robot_app
-Version: 0.0.78
+Version: 0.0.80
 Summary: A FastAPI application serving ws bom/robot/llm platform ai.
 Home-page: https://github.com/websolutespa/bom
 Author: Websolute Spa
@@ -83,18 +83,30 @@ from ws_bom_robot_app import main
 app = main.app
 ```
-FIll `.env` with the following code:
-```env
-#robot_env=local/development/production
-robot_env=local
-robot_user='[user]'
-robot_password='[pwd]'
-robot_data_folder='./.data'
-robot_cms_auth='[auth]'
-robot_cms_host='https://[DOMAIN]'
-robot_cms_db_folder=llmVectorDb
-robot_cms_files_folder=llmKbFile
+Create a `.env` file in the root directory with the following configuration:
+```properties
+# robot configuration
+robot_env=development
+robot_user=your_username
+USER_AGENT=ws-bom-robot-app
+# cms (bowl) configuration
+robot_cms_host='http://localhost:4000'
+robot_cms_auth='users API-Key your-api-key-here'
+# llm providers: fill one or more of these with your API keys
+DEEPSEEK_API_KEY="your-deepseek-api-key"
+OPENAI_API_KEY="your-openai-api-key"
+GOOGLE_API_KEY="your-google-api-key"
+ANTHROPIC_API_KEY="your-anthropic-api-key"
+GROQ_API_KEY="your-groq-api-key"
+# ibm
+WATSONX_URL="https://eu-gb.ml.cloud.ibm.com"
+WATSONX_APIKEY="your-watsonx-api-key"
+WATSONX_PROJECTID="your-watsonx-project-id"
+# gvertex: ensure to mount the file in docker
+GOOGLE_APPLICATION_CREDENTIALS="./.data/secrets/google-credentials.json"
 ```
 ## 🚀 Run the app
@@ -120,15 +132,52 @@ robot_cms_files_folder=llmKbFile
   #gunicorn -w 4 -k uvicorn.workers.UvicornWorker main:app --bind
   ```
-### 🔖 Windows requirements
+## 📖 API documentation
+- [swagger](http://localhost:6001/docs)
+- [redoc](http://localhost:6001/redoc)
+---
+## 🐳 Docker
+dockerize base image
+```pwsh
+<# cpu #>
+docker build -f Dockerfile-robot-base-cpu -t ghcr.io/websolutespa/ws-bom-robot-base:cpu .
+docker push ghcr.io/websolutespa/ws-bom-robot-base:cpu
+<# gpu #>
+docker build -f Dockerfile-robot-base-gpu -t ghcr.io/websolutespa/ws-bom-robot-base:gpu .
+docker push ghcr.io/websolutespa/ws-bom-robot-base:gpu
+```
+dockerize app
+```pwsh
+docker build -f Dockerfile -t ws-bom-robot-app .
+docker run --rm --name ws-bom-robot-app -d --env-file .env -p 6001:6001 ws-bom-robot-app
+```
+docker run mounted to src (dev mode)
-  #### libmagic (mandatory)
+```pwsh
+docker run --rm --name ws-bom-robot-app-src -d --env-file .env -v "$(pwd)/ws_bom_robot_app:/app/ws_bom_robot_app" -v "$(pwd)/.data:/app/.data" -v "$(pwd)/tmp:/tmp"  -p 6001:6001 ws-bom-robot-app fastapi dev ./ws_bom_robot_app/main.py --host 0.0.0.0 --port 6001
+```
+---
+## 🔖 Windows requirements (for RAG functionality only)
+> ⚠️ While it's strongly recommended to use a docker container for development, you can run the app on Windows with the following requirements
+### libmagic (mandatory)
   ```bash
   py -m pip install --upgrade python-magic-bin
   ```
-  #### tesseract-ocr (mandatory)
+### tesseract-ocr (mandatory)
   [Install tesseract](https://github.com/UB-Mannheim/tesseract/wiki)
   [Last win-64 release](https://github.com/tesseract-ocr/tesseract/releases/download/5.5.0/tesseract-ocr-w64-setup-5.5.0.20241111.exe)
@@ -143,15 +192,15 @@ robot_cms_files_folder=llmKbFile
   }
   ```
-  #### docling
+### docling
   Set the following environment variables
   ```pwsh
   KMP_DUPLICATE_LIB_OK=TRUE
   ```
-  #### libreoffice (optional: for robot_env set to development/production)
+### libreoffice (optional: for robot_env set to development/production)
   [Install libreoffice](https://www.libreoffice.org/download/download-libreoffice/)
   [Last win-64 release](https://download.documentfoundation.org/libreoffice/stable/24.8.2/win/x86_64/LibreOffice_24.8.2_Win_x86-64.msi)
@@ -166,7 +215,7 @@ robot_cms_files_folder=llmKbFile
   }
   ```
-  #### poppler (optional: for robot_env set to development/production)
+### poppler (optional: for robot_env set to development/production)
   [Download win poppler release](https://github.com/oschwartz10612/poppler-windows/releases)
   Extract the zip, copy the nested folder "poppler-x.x.x." to a program folder (e.g. C:\Program Files\poppler-24.08.0)
@@ -210,7 +259,7 @@ py -m build && twine check dist/*
 Install the package in editable project location
 ```pwsh
-py -m pip install --upgrade -e .
+py -m pip install -U -e .
 py -m pip show ws-bom-robot-app
 ```
@@ -231,7 +280,7 @@ prospector ./ws_bom_robot_app -t dodgy -t bandit
 prospector ./ws_bom_robot_app -t pyroma
 ```
-lauch pytest
+#### 🧪 run tests
 ```pwsh
 !py -m pip install -U pytest pytest-asyncio pytest-mock pytest-cov pyclean
@@ -242,48 +291,24 @@ pytest --cov=ws_bom_robot_app --log-cli-level=info
 # pytest --cov=ws_bom_robot_app --log-cli-level=info ./tests/app/llm/vector_store/db
 ```
-launch debugger
+#### 🐞 start debugger
 ```pwsh
 streamlit run debugger.py --server.port 8051
 ```
-dockerize base image
-```pwsh
-<# cpu #>
-docker build -f Dockerfile-robot-base-cpu -t ghcr.io/websolutespa/ws-bom-robot-base:cpu .
-docker push ghcr.io/websolutespa/ws-bom-robot-base:cpu
-<# gpu #>
-docker build -f Dockerfile-robot-base-gpu -t ghcr.io/websolutespa/ws-bom-robot-base:gpu .
-docker push ghcr.io/websolutespa/ws-bom-robot-base:gpu
-```
-dockerize app
-```pwsh
-docker build -f Dockerfile -t ws-bom-robot-app .
-docker run --rm --name ws-bom-robot-app -d -p 6001:6001 ws-bom-robot-app
-```
-docker run mounted to src
-```pwsh
-docker run --rm --name ws-bom-robot-app-src -d -v "$(pwd)/ws_bom_robot_app:/app/ws_bom_robot_app" -v "$(pwd)/.data:/app/.data" -v "$(pwd)/tmp:/tmp"  -p 6001:6001 ws-bom-robot-app
-```
 ### ✈️ publish
 - [testpypi](https://test.pypi.org/project/ws-bom-robot-app/)
   ```pwsh
   twine upload --verbose -r testpypi dist/*
-  #py -m pip install -i https://test.pypi.org/simple/ --upgrade ws-bom-robot-app
+  #pip install -i https://test.pypi.org/simple/ -U ws-bom-robot-app
   ```
 - [pypi](https://pypi.org/project/ws-bom-robot-app/)
   ```pwsh
   twine upload --verbose dist/*
-  #py -m pip install --upgrade ws-bom-robot-app
   ```

{ws_bom_robot_app-0.0.78.dist-info → ws_bom_robot_app-0.0.80.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
 ws_bom_robot_app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ws_bom_robot_app/auth.py,sha256=84nIbmJsMrNs0sxIQGEHbjsjc2P6ZrZZGSn8dkiL6is,895
-ws_bom_robot_app/config.py,sha256=QvoWds1DkBHqd3jAxDQtcmQSN8B6SrpBGERDXhTlswk,4490
-ws_bom_robot_app/cron_manager.py,sha256=pFHV7SZtp6GRmmLD9K1Mb1TE9Ev9n5mIiFScrc7tpCo,9221
-ws_bom_robot_app/main.py,sha256=1vx0k2fEcE53IC5zcE2EUCwQPcUHM4pvuKSun_E0a9I,6501
+ws_bom_robot_app/config.py,sha256=58dIVRSd-2jpkCfSYmyI5E7zld3GpBr9m3TGh1nxhgU,4624
+ws_bom_robot_app/cron_manager.py,sha256=jk79843WEU-x-rc78O_0KpzWY4AZDBuFRXRwaczXTq8,9370
+ws_bom_robot_app/main.py,sha256=LZH4z9BmVlxpFJf8TrIo_JxH1YhpeZRrrOYgKky7S7w,6712
 ws_bom_robot_app/task_manager.py,sha256=Q3Il2TtkP0FoG9zHEBu48pZGXzimTtvWQsoH6wdvQs0,16077
 ws_bom_robot_app/util.py,sha256=RjVD6B9sHje788Lndqq5DHy6TJM0KLs9qx3JYt81Wyk,4834
 ws_bom_robot_app/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -12,17 +12,17 @@ ws_bom_robot_app/llm/agent_handler.py,sha256=TnpfChHLWVQ-gCEHNQPW3UXiuS8AmiP8JYw
 ws_bom_robot_app/llm/agent_lcel.py,sha256=tVa1JJOuL1CG0tXS5AwOB4gli0E2rGqSBD5oEehHvOY,2480
 ws_bom_robot_app/llm/api.py,sha256=EUllZaJvtm1CQPxJ6QfufpBhZG_-ew8gSK-vxVg5r58,5054
 ws_bom_robot_app/llm/defaut_prompt.py,sha256=LlCd_nSMkMmHESfiiiQYfnJyB6Pp-LSs4CEKdYW4vFk,1106
-ws_bom_robot_app/llm/main.py,sha256=BXTIfVc9Ck7izZ893qry7C_uz1A8ZupbcHivrZrjpxY,5372
-ws_bom_robot_app/llm/nebuly_handler.py,sha256=hbkiTc0Jl4EzwXltpICiUXM5i5wOsmEX_Chyr1NhvSc,7924
+ws_bom_robot_app/llm/main.py,sha256=U_zUcL51VazXUyEicWFoNGkqwV-55s3tn52BlVPINes,5670
+ws_bom_robot_app/llm/nebuly_handler.py,sha256=Z4_GS-N4vQYPLnlXlwhJrwpUvf2uG53diYSOcteXGTc,7978
 ws_bom_robot_app/llm/feedbacks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ws_bom_robot_app/llm/feedbacks/feedback_manager.py,sha256=WcKgzlOb8VFG7yqHoIOO_R6LAzdzE4YIRFCVOGBSgfM,2856
 ws_bom_robot_app/llm/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ws_bom_robot_app/llm/models/api.py,sha256=qr9BO3NqQ4juYcJGARPiKas5Mj2gIg6gUH9MpmDkMLE,11010
+ws_bom_robot_app/llm/models/api.py,sha256=3fnl9uZDk7SUR53vnoM-YsRdNy2-8M3m2vkQ_LwXiHs,12194
 ws_bom_robot_app/llm/models/base.py,sha256=1TqxuTK3rjJEALn7lvgoen_1ba3R2brAgGx6EDTtDZo,152
 ws_bom_robot_app/llm/models/feedback.py,sha256=zh1jLqPRLzNlxInkCMoiJbfSu0-tiOEYHM7FhC46PkM,1692
 ws_bom_robot_app/llm/models/kb.py,sha256=oVSw6_dmNxikAHrPqcfxDXz9M0ezLIYuxpgvzfs_Now,9514
 ws_bom_robot_app/llm/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ws_bom_robot_app/llm/providers/llm_manager.py,sha256=j-AnRh5jZ3eSNOZcmVKO63oBtosXA_blBBrjvJtIkWU,11942
+ws_bom_robot_app/llm/providers/llm_manager.py,sha256=-gP-0tOthxHnwpRh7hvxP93eqpbNYe0iVTk6XKXuJRI,15877
 ws_bom_robot_app/llm/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ws_bom_robot_app/llm/tools/tool_builder.py,sha256=QTRG1c-EnH4APP10IyfZxEkqK9KitUsutXUvDRKeAhU,3224
 ws_bom_robot_app/llm/tools/tool_manager.py,sha256=1IgRXxdB7DU3gbIlfT_aMUWZyWuanFTAFwu3VaYKxfE,14990
@@ -32,9 +32,9 @@ ws_bom_robot_app/llm/tools/models/main.py,sha256=1hICqHs-KS2heenkH7b2eH0N2GrPaaN
 ws_bom_robot_app/llm/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ws_bom_robot_app/llm/utils/agent.py,sha256=_CY5Dji3UeAIi2iuU7ttz4fml1q8aCFgVWOv970x8Fw,1411
 ws_bom_robot_app/llm/utils/chunker.py,sha256=N7570xBYlObneg-fsvDhPAJ-Pv8C8OaYZOBK6q7LmMI,607
+ws_bom_robot_app/llm/utils/cleanup.py,sha256=TiT4plB4puLyPPDU9MhBdNsjk7kfBXSygoGTEKeKhUM,2888
 ws_bom_robot_app/llm/utils/cms.py,sha256=XhrLQyHQ2JUOInDCCf_uvR4Jiud0YvH2FwwiiuCnnsg,6352
-ws_bom_robot_app/llm/utils/download.py,sha256=iAUxH_NiCpTPtGzhC4hBtxotd2HPFt2MBhttslIxqiI,3194
-ws_bom_robot_app/llm/utils/kb.py,sha256=jja45WCbNI7SGEgqDS99nErlwB5eY8Ga7BMnhdMHZ90,1279
+ws_bom_robot_app/llm/utils/download.py,sha256=OBtLEALcjRKTutadnqnJ_F_dD5dT3OOS_rdhf45jj58,7268
 ws_bom_robot_app/llm/utils/print.py,sha256=IsPYEWRJqu-dqlJA3F9OnnIS4rOq_EYX1Ljp3BvDnww,774
 ws_bom_robot_app/llm/utils/secrets.py,sha256=-HtqLIDVIJrpvGC5YhPAVyLsq8P4ChVM5g3GOfdwqVk,878
 ws_bom_robot_app/llm/utils/webhooks.py,sha256=LAAZqyN6VhV13wu4X-X85TwdDgAV2rNvIwQFIIc0FJM,2114
@@ -66,7 +66,7 @@ ws_bom_robot_app/llm/vector_store/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5
 ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=L_ugekNuAq0N9O-24wtlHSNHkqSeD-KsJrfGt_FX9Oc,5340
 ws_bom_robot_app/llm/vector_store/loader/docling.py,sha256=yP0zgXLeFAlByaYuj-6cYariuknckrFds0dxdRcnVz8,3456
 ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=qo9ejRZyKv_k6jnGgXnu1W5uqsMMtgqK_uvPpZQ0p74,833
-ws_bom_robot_app-0.0.78.dist-info/METADATA,sha256=96-Ate6TbTUzRqNCLEe6gJEblJOA4r9BoVY6Ajbb2_4,8609
-ws_bom_robot_app-0.0.78.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-ws_bom_robot_app-0.0.78.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
-ws_bom_robot_app-0.0.78.dist-info/RECORD,,
+ws_bom_robot_app-0.0.80.dist-info/METADATA,sha256=nRY0FtH8gCw1Jr7rRS-r_WVqJnmsz1y7_HXTOxEizyk,9499
+ws_bom_robot_app-0.0.80.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+ws_bom_robot_app-0.0.80.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
+ws_bom_robot_app-0.0.80.dist-info/RECORD,,

ws_bom_robot_app/llm/utils/kb.py DELETED Viewed

@@ -1,34 +0,0 @@
-import os
-from ws_bom_robot_app.config import config
-from datetime import datetime, timedelta
-from ws_bom_robot_app.util import _log
-def kb_cleanup_data_file() -> dict:
-    """
-    clean up old data files in the specified folder
-    Returns:
-    - Dictionary with cleanup statistics
-    """
-    _deleted_files = []
-    _freed_space = 0
-    folder = os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_out)
-    for root, dirs, files in os.walk(folder, topdown=False):
-        for file in files:
-            file_path = os.path.join(root, file)
-            file_stat = os.stat(file_path)
-            file_creation_time = datetime.fromtimestamp(file_stat.st_ctime)
-            if file_creation_time < datetime.now() - timedelta(days=config.robot_data_db_retention_days):
-                _freed_space += file_stat.st_size
-                os.remove(file_path)
-                _deleted_files.append(file_path)
-        if not os.listdir(root):
-            os.rmdir(root)
-    _log.info(f"Deleted {len(_deleted_files)} files; Freed space: {_freed_space / (1024 * 1024):.2f} MB")
-    return {
-        "deleted_files_count": len(_deleted_files),
-        "freed_space_mb": _freed_space / (1024 * 1024)
-    }

{ws_bom_robot_app-0.0.78.dist-info → ws_bom_robot_app-0.0.80.dist-info}/WHEEL RENAMED Viewed

File without changes

{ws_bom_robot_app-0.0.78.dist-info → ws_bom_robot_app-0.0.80.dist-info}/top_level.txt RENAMED Viewed

File without changes

ws-bom-robot-app 0.0.78__py3-none-any.whl → 0.0.80__py3-none-any.whl

ws-bom-robot-app 0.0.78py3-none-any.whl → 0.0.80py3-none-any.whl