PyPI - ws-bom-robot-app - Versions diffs - 0.0.10__tar.gz → 0.0.12__tar.gz - Mend

ws-bom-robot-app 0.0.10tar.gz → 0.0.12tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

{ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ws_bom_robot_app
-Version: 0.0.10
+Version: 0.0.12
 Summary: A FastAPI application serving ws bom/robot/llm platform ai.
 Home-page: https://github.com/websolutespa/bom
 Author: Websolute Spa
@@ -11,7 +11,7 @@ Classifier: Operating System :: OS Independent
 Requires-Python: >=3.12
 Description-Content-Type: text/markdown
 Requires-Dist: standardwebhooks==1.0.0
-Requires-Dist: schedule==1.2.2
+Requires-Dist: apscheduler==3.11.0
 Requires-Dist: aiofiles==24.1.0
 Requires-Dist: pydantic==2.9.2
 Requires-Dist: pydantic-settings==2.6.0

{ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/setup.py RENAMED Viewed

@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
 setup(
     name="ws_bom_robot_app",
-    version="0.0.10",
+    version="0.0.12",
     description="A FastAPI application serving ws bom/robot/llm platform ai.",
     long_description=open("README.md", encoding='utf-8').read(),
     long_description_content_type="text/markdown",

{ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/config.py RENAMED Viewed

@@ -1,6 +1,7 @@
+from typing import Optional
 from pydantic import BaseModel, ConfigDict
 from pydantic_settings import BaseSettings
+import os
 class Settings(BaseSettings):
     robot_env: str = 'local'
@@ -12,6 +13,7 @@ class Settings(BaseSettings):
     robot_data_db_folder_out: str = 'out'
     robot_data_db_folder_store: str = 'store'
     robot_data_db_retention_days: float = 60
+    robot_task_max_concurrent: int = os.cpu_count() or 1
     robot_task_retention_days: float = 1
     robot_cms_host: str = ''
     robot_cms_auth: str = ''
@@ -25,10 +27,37 @@ class Settings(BaseSettings):
     )
     class RuntimeOptions(BaseModel):
+        @staticmethod
+        def _get_number_of_workers() -> int:
+            """
+            Returns the number of worker processes to use for the application.
+            This function inspects the command-line arguments to determine the number
+            of worker processes to use. It looks for the "--workers" argument and
+            returns the subsequent value as an integer.
+            Sample of command-line arguments:
+            fastapi dev main.py --port 6001
+            fastapi run main.py --port 6001 --workers 4
+            uvicorn main:app --port 6001 --workers 4
+            Returns:
+                Optional[int]: The number of worker processes to use, or 1 if
+                               the argument is not found or the value is invalid.
+            """
+            import sys
+            try:
+                for i, arg in enumerate(sys.argv):
+                    if arg == "--workers" and i + 1 < len(sys.argv):
+                        return int(sys.argv[i + 1])
+            except (ValueError, IndexError):
+                pass
+            return 1
         debug: bool
         loader_strategy: str
         loader_show_progress: bool
         loader_silent_errors: bool
+        number_of_workers: int = _get_number_of_workers()
+        is_multi_process: bool = _get_number_of_workers() > 1
     def runtime_options(self) -> RuntimeOptions:

ws_bom_robot_app-0.0.12/ws_bom_robot_app/cron_manager.py ADDED Viewed

@@ -0,0 +1,251 @@
+from apscheduler.schedulers.background import BackgroundScheduler
+#from apscheduler.schedulers.asyncio import AsyncIOScheduler
+from apscheduler.jobstores.memory import MemoryJobStore
+from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
+from apscheduler.triggers.cron import CronTrigger
+from apscheduler.triggers.interval import IntervalTrigger
+from apscheduler.triggers.date import DateTrigger
+from fastapi import APIRouter
+from datetime import datetime
+from ws_bom_robot_app.task_manager import task_manager
+from ws_bom_robot_app.llm.utils.kb import kb_cleanup_data_file
+from ws_bom_robot_app.util import _log
+from ws_bom_robot_app.config import config
+class JobstoreStrategy:
+    def get_jobstore(self):
+        raise NotImplementedError("Subclasses should implement this method")
+class MemoryJobstoreStrategy(JobstoreStrategy):
+    def get_jobstore(self):
+        _log.info("Using in-memory cron jobstore.")
+        return {"default": MemoryJobStore()}
+class PersistentJobstoreStrategy(JobstoreStrategy):
+    def get_jobstore(self, db_url: str = "sqlite:///.data/db/jobs.sqlite"):
+        _log.info(f"Using persistent crob jobstore with database URL: {db_url}.")
+        return {"default": SQLAlchemyJobStore(url=db_url)}
+class Job:
+    def __init__(self, name: str, job_func, args: list = None, kwargs: dict = None, cron_expression: str = None, interval: int = None, run_at: datetime = None):
+        """
+        Job class that supports both recurring and one-time jobs.
+        :param job_func: The function to execute.
+        :param interval: Interval in seconds for recurring jobs.
+        :param run_at: Specific datetime for one-time jobs.
+        :param tags: Tags associated with the job.
+        """
+        if not (cron_expression or interval or run_at):
+            raise ValueError("Either 'interval' or 'run_at' must be provided.")
+        self.name = name
+        self.job_func = job_func
+        self.args: list = args or []
+        self.kwargs: dict = kwargs or {}
+        self.cron_expression = cron_expression
+        self.interval = interval
+        self.run_at = run_at
+    def create_trigger(self):
+        """Create the appropriate trigger based on the job type."""
+        if self.cron_expression:
+            return CronTrigger.from_crontab(self.cron_expression)
+        if self.interval:
+            return IntervalTrigger(seconds=self.interval)
+        elif self.run_at:
+            return DateTrigger(run_date=self.run_at)
+class CronManager:
+    _list_default = [
+            Job('cleanup-task',task_manager.cleanup_task, interval=5 * 60),
+            Job('cleanup-data',kb_cleanup_data_file, interval=180 * 60),
+        ]
+    def __get_jobstore_strategy() -> JobstoreStrategy:
+        if True or config.runtime_options().is_multi_process:
+            return MemoryJobstoreStrategy()
+        return PersistentJobstoreStrategy()
+    def __init__(self, strategy: JobstoreStrategy = None, enable_defaults: bool = True):
+        self.enable_defaults = enable_defaults
+        if strategy is None:
+          strategy = CronManager.__get_jobstore_strategy()
+        jobstores = strategy.get_jobstore()
+        self.scheduler: BackgroundScheduler = BackgroundScheduler(jobstores=jobstores)
+        self.__scheduler_is_running = False
+    def add_job(self, job: Job):
+        """
+        Adds a job to the scheduler with the specified name and job details.
+        Args:
+          name (str): The unique identifier for the job.
+          job (Job): An instance of the Job class containing the job details.
+        The job details include:
+          - job_func: The function to be executed.
+          - args: The positional arguments to pass to the job function.
+          - kwargs: The keyword arguments to pass to the job function.
+          - trigger: The trigger that determines when the job should be executed.
+        The job will replace any existing job with the same name.
+        Sample usage:
+          recurring_job = Job(name="sample-recurring-job",job_func=example_job, interval=5, tags=tags, args=args, kwargs=kwargs)
+          cron_manager.add_job(recurring_job)
+          fire_once_job = Job(name="sample-fire-once-job",job_func=example_job, run_at=datetime.now(), tags=tags, args=args, kwargs=kwargs)
+          cron_manager.add_job(fire_once_job)
+        """
+        existing_job = self.scheduler.get_job(job.name)
+        if existing_job:
+            _log.info(f"Job with name '{job.name}' already exists. Skip creation.")
+        else:
+          trigger = job.create_trigger()
+          self.scheduler.add_job(
+              func=job.job_func,
+              args=job.args,
+              kwargs=job.kwargs,
+              trigger=trigger,
+              id=job.name,
+              name=job.name,
+              replace_existing=True
+          )
+    def start(self):
+        if not self.__scheduler_is_running:
+            self.__scheduler_is_running = True
+            self.scheduler.start()
+            if self.enable_defaults and CronManager._list_default:
+                for job in CronManager._list_default:
+                    existing_job = self.scheduler.get_job(job.name)
+                    if existing_job is None:
+                        self.add_job(job)
+    def get_job(self, job_id: str):
+        return self.scheduler.get_job(job_id)
+    def get_jobs(self):
+        return self.scheduler.get_jobs()
+    def execute_job(self, job_id: str):
+        job = self.scheduler.get_job(job_id)
+        if job:
+            job.func()
+        else:
+            raise ValueError(f"Job with id '{job_id}' not found.")
+    def pause_job(self, job_id: str):
+        self.scheduler.pause_job(job_id)
+    def resume_job(self, job_id: str):
+        self.scheduler.resume_job(job_id)
+    def remove_job(self, job_id: str):
+        self.scheduler.remove_job(job_id)
+    def execute_recurring_jobs(self):
+        for job in self.scheduler.get_jobs():
+            if job.interval:
+                job.job_func()
+    def pause_recurring_jobs(self):
+        for job in self.scheduler.get_jobs():
+            if job.interval:
+                self.pause_job(job.id)
+    def resume_recurring_jobs(self):
+        for job in self.scheduler.get_jobs():
+            if job.interval:
+                self.resume_job(job.id)
+    def remove_recurring_jobs(self):
+        for job in self.scheduler.get_jobs():
+            if job.interval:
+                self.remove_job(job.id)
+    def clear(self):
+        self.__scheduler_is_running = False
+        self.scheduler.remove_all_jobs()
+    def shutdown(self):
+        self.scheduler.shutdown()
+cron_manager = CronManager()
+# FastAPI Routes
+router = APIRouter(prefix="/api/cron", tags=["cron"])
+@router.get("/list")
+def _list():
+    def __format(job):
+        return {
+            "id": job.id,
+            "name": job.name,
+            "func": job.func_ref,
+            "pending": job.pending,
+            "trigger": str(job.trigger),
+            "next_run_time": job.next_run_time
+        }
+    return [__format(job) for job in cron_manager.get_jobs()]
+@router.get("/default-jobs")
+def _default_jobs():
+    def __format(job):
+        existing_job = cron_manager.scheduler.get_job(job.name)
+        return {
+            "name": job.name,
+            "status": "exists" if existing_job else "not added"
+        }
+    return [__format(job) for job in CronManager._list_default]
+@router.post("/execute-job/{job_id}")
+def _execute_job(job_id: str):
+    try:
+        cron_manager.execute_job(job_id)
+        return {"status": f"Job {job_id} executed"}
+    except ValueError as e:
+        return {"error": str(e)}
+@router.post("/pause-job/{job_id}")
+def _pause_job(job_id: str):
+    cron_manager.pause_job(job_id)
+    return {"status": f"Job {job_id} paused"}
+@router.post("/resume-job/{job_id}")
+def _resume_job(job_id: str):
+    cron_manager.resume_job(job_id)
+    return {"status": f"Job {job_id} resumed"}
+@router.delete("/remove-job/{job_id}")
+def _remove_job(job_id: str):
+    cron_manager.remove_job(job_id)
+    return {"status": f"Job {job_id} removed"}
+@router.post("/execute-recurring")
+def _execute_recurring():
+    cron_manager.execute_recurring_jobs()
+    return {"status": "All recurring jobs executed"}
+@router.post("/pause-recurring")
+def _pause_recurring():
+    cron_manager.pause_recurring_jobs()
+    return {"status": "All recurring jobs paused"}
+@router.post("/resume-recurring")
+def _resume_recurring():
+    cron_manager.resume_recurring_jobs()
+    return {"status": "All recurring jobs resumed"}
+@router.delete("/remove-recurring")
+def _remove_recurring():
+    cron_manager.remove_recurring_jobs()
+    return {"status": "All recurring jobs removed"}
+@router.get("/start")
+def _start():
+    cron_manager.start()
+    return {"status": "started"}
+@router.delete("/stop")
+def _stop():
+    cron_manager.clear()
+    return {"status": "stopped"}
+@router.get("/shutdown")
+def _shutdown():
+    cron_manager.shutdown()
+    return {"status": "shutdown"}

{ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/models/api.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Dict, Optional
+from typing import List, Dict, Optional, Union
 from datetime import datetime
 from pydantic import AliasChoices, BaseModel, Field, ConfigDict
 from ws_bom_robot_app.llm.models.kb import LlmKbEndpoint, LlmKbIntegration
@@ -125,7 +125,7 @@ class VectorDbRequest(BaseModel):
   def api_key(self):
     return self.secrets.get("openAIApiKey", "")
   def out_name(self):
-    return "vector_db_" + datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f")[:-3]
+    return f"db_{datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f")[:-3]}_{os.getpid()}"
 class RulesRequest(VectorDbRequest):
   type: Optional[str] = 'rules'

{ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/models/kb.py RENAMED Viewed

@@ -144,7 +144,7 @@ async def load_endpoints(endpoints: list[LlmKbEndpoint], destination_directory:
             documents = await JsonLoader(
               file_path,
               meta_fields=[field.name for field in endpoint.fields_mapping.meta_fields] if endpoint.fields_mapping.meta_fields else []
-            ).load()
+            ).aload()
             _documents.extend(documents)
             await aiofiles.os.remove(file_path)
           except Exception as e:

{ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/webhooks.py RENAMED Viewed

@@ -51,3 +51,4 @@ class WebhookNotifier:
         async with httpx.AsyncClient(headers=_headers,verify=False,timeout=timeout) as client:
           response = await client.post(endpoint, data=_data)
           response.raise_for_status()

{ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/generator.py RENAMED Viewed

@@ -9,7 +9,7 @@ from ws_bom_robot_app.llm.vector_store.integration.manager import IntegrationMan
 from ws_bom_robot_app.llm.utils.faiss_helper import FaissHelper
 from ws_bom_robot_app.util import timer
-@timer
+#@timer
 async def rules(rq: RulesRequest) -> VectorDbResponse:
   api_key = rq.api_key()
   _config = rq.config()

{ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/integration/sitemap.py RENAMED Viewed

@@ -1,3 +1,4 @@
+from typing import Any
 import aiofiles
 import aiofiles.os
 from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
@@ -5,23 +6,21 @@ from langchain_community.document_loaders.sitemap import SitemapLoader
 from langchain_community.document_transformers import MarkdownifyTransformer as markdownify
 from langchain_core.documents import Document
 from bs4 import BeautifulSoup, Tag
-import nest_asyncio, os
 class Sitemap(IntegrationStrategy):
-    """_summary_
+    """Class to load a sitemap.xml file and extract text from the URLs.
       Load a sitemap.xml file and extract text from the urls.
     Args:
         data (dict[str, str]):
         data["sitemapUrl"] (str): absolute/relative url of the sitemap.xml
         data["outputFormat"] (str): ["text", "html", "markdown"] default to "text"
-        data["filterUrls"] list: list of regex pattern to filter urls ["https://www.example.com/en/products", "^.*products.*$"]
-        data["includeOnlySelector"] : [".content", "#main-article", "article p"]
-        data["excludeTag"] (str): default to ["script", "noscript", "style", "head", "header","nav","footer", "iframe"]
-        data["excludeClass"] (str): ["class1", "class2"]
-        data["excludeId"] (str): ["id1", "id2"]
+        data["filterUrls"] list[str]: list of regex pattern to filter urls ["https://www.example.com/en/products", "^.*products.*$"]
+        data["includeOnlySelector"] : list[str] [".content", "#main-article", "article p"]
+        data["excludeTag"] (list[str]): default to ["script", "noscript", "style", "head", "header","nav","footer", "iframe"]
+        data["excludeClass"] (list[str]): ["class1", "class2"]
+        data["excludeId"] (list[str]): ["id1", "id2"]
     """
-    def __init__(self, knowledgebase_path: str, data: dict[str, str]):
+    def __init__(self, knowledgebase_path: str, data: dict[str, Any]):
         super().__init__(knowledgebase_path, data)
         self.__sitemap_url = self.data.get("sitemapUrl")
         self.__filter_urls: list[str] = self.data.get("filterUrls",[]) # type: ignore
@@ -30,12 +29,12 @@ class Sitemap(IntegrationStrategy):
         self.__exclude_tag: list[str] = self.data.get("excludeTag",[]) # type: ignore
         self.__exclude_class: list[str] = self.data.get("excludeClass",[]) # type: ignore
         self.__exclude_id: list[str] = self.data.get("excludeId",[]) # type: ignore
-    def working_subdirectory(self) -> str: # type: ignore
+    def working_subdirectory(self) -> str:
         return ""
     def _extract(self, tag: Tag) -> str:
         return tag.get_text() if self.__output_format == "text" else tag.prettify()
     def _output(self, documents: list[Document]) -> list[Document]:
-        return list(markdownify().transform_documents(documents)) if (self.__output_format == "markdown") else documents
+        return list(markdownify().transform_documents(documents)) if self.__output_format == "markdown" else documents
     def _parse(self,content: BeautifulSoup) -> str:
         if self.__include_only_selectors:
             extracted = []
@@ -55,21 +54,25 @@ class Sitemap(IntegrationStrategy):
                 for _ in content.select(element):
                     _.decompose()
             return str(self._extract(content))
-    async def load(self) -> list[Document]:
-        def _is_local(url: str) -> bool:
-            return not url.startswith("http")
-        def _remap_if_local(url: str) -> str:
-            return f"{self.knowledgebase_path}/{url}" if _is_local(url) else url
+    def _is_local(self, url: str) -> bool:
+        return not url.startswith("http")
+    def _remap_if_local(self, url: str) -> str:
+        return f"{self.knowledgebase_path}/{url}" if self._is_local(url) else url
+    async def load(self) -> list[Document]:
         if (self.__sitemap_url):
             _loader = SitemapLoader(
-                web_path=_remap_if_local(self.__sitemap_url),
+                web_path=self._remap_if_local(self.__sitemap_url),
                 filter_urls=self.__filter_urls,
                 parsing_function=self._parse,
-                is_local=_is_local(self.__sitemap_url)
+                is_local=self._is_local(self.__sitemap_url)
             )
             _docs = self._output([document async for document in _loader.alazy_load()])
-            if _is_local(self.__sitemap_url):
-              await aiofiles.os.remove(_loader.web_path)
+            if self._is_local(self.__sitemap_url):
+              try:
+                  await aiofiles.os.remove(_loader.web_path)
+              except FileNotFoundError:
+                  pass
             return _docs
         return []

{ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/loader/base.py RENAMED Viewed

@@ -109,4 +109,5 @@ class Loader():
   #@timer
   async def load(self) -> list[Document]:
     loaders = MergedDataLoader(self.__directory_loader())
-    return await asyncio.to_thread(loaders.load)
+    return await loaders.aload()
+    #return await asyncio.to_thread(loaders.load)

{ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/loader/json_loader.py RENAMED Viewed

@@ -2,7 +2,6 @@ import json
 from typing import Optional
 from langchain_core.documents import Document
 from langchain_community.document_loaders.base import BaseLoader
-import aiofiles
 class JsonLoader(BaseLoader):
   def __init__(self, file_path: str, meta_fields:Optional[list[str]] = [],encoding: Optional[str] = "utf-8"):
@@ -10,9 +9,9 @@ class JsonLoader(BaseLoader):
     self.meta_fields = meta_fields
     self.encoding = encoding
-  async def load(self) -> list[Document]:
-    async with aiofiles.open(self.file_path, "r", encoding=self.encoding) as file:
-      data = json.loads(await file.read())
+  def load(self) -> list[Document]:
+    with open(self.file_path, "r", encoding=self.encoding) as file:
+      data = json.load(file)
     _list = data if isinstance(data, list) else [data]
     return [
       Document(

{ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/main.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import datetime
 import platform
+from fastapi.responses import FileResponse
 import uvicorn, os, sys
 from fastapi import FastAPI, Depends
 from fastapi.openapi.docs import get_swagger_ui_html
@@ -22,7 +23,10 @@ app.include_router(cron,dependencies=[Depends(authenticate)])
 @app.get("/")
 async def root():
-    return {}
+    return health()
+@app.get("/favicon.ico")
+async def favicon():
+    return FileResponse("./favicon.ico")
 @app.get("/docs", include_in_schema=False)
 async def get_swagger_documentation(authenticate: bool = Depends(authenticate)):
@@ -31,7 +35,11 @@ async def get_swagger_documentation(authenticate: bool = Depends(authenticate)):
 async def openapi(authenticate: bool = Depends(authenticate)):
     return get_openapi(title=app.title, version=app.version, routes=app.routes)
-@app.get("/diag",tags=["diag"])
+@app.get("/api/health",tags=["diag"])
+def health():
+    return {"status": "ok"}
+@app.get("/api/diag",tags=["diag"])
 def diag(authenticate: bool = Depends(authenticate)):
     import pkg_resources
     from ws_bom_robot_app.llm.vector_store.loader.base import Loader as wsll
@@ -48,6 +56,7 @@ def diag(authenticate: bool = Depends(authenticate)):
                 "version": platform.version(),
                 "type": platform.machine(),
                 "processor": platform.processor(),
+                "cpu": os.cpu_count(),
                 "architecture": platform.architecture()
             },
             "sys": {
@@ -57,6 +66,7 @@ def diag(authenticate: bool = Depends(authenticate)):
                 "args": {k: arg for k, arg in enumerate(sys.argv)}
             },
             "os": {
+                "ppid": os.getppid(),
                 "pid": os.getpid(),
                 "cwd": os.getcwd(),
                 "ws_bom_robot_app": pkg_resources.get_distribution("ws_bom_robot_app").version,
@@ -64,6 +74,7 @@ def diag(authenticate: bool = Depends(authenticate)):
             },
         },
         "config":config,
+        "runtime":config.runtime_options(),
         "extension": {
             "loader": ({item[0]: item[1].loader.__name__ if item[1] else None} for item in sorted(wsll._list.items(), key=lambda x: x[0]) if item[1]),
             "integration":({item[0]: type(item[1]).__name__} for item in wsim._list.items()),

{ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/requirements.txt RENAMED Viewed

@@ -1,6 +1,6 @@
 #app
 standardwebhooks==1.0.0
-schedule==1.2.2
+apscheduler==3.11.0
 aiofiles==24.1.0
 pydantic==2.9.2
 pydantic-settings==2.6.0

ws-bom-robot-app 0.0.10__tar.gz → 0.0.12__tar.gz

ws-bom-robot-app 0.0.10tar.gz → 0.0.12tar.gz