ws-bom-robot-app 0.0.11__tar.gz → 0.0.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/PKG-INFO +1 -1
  2. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/setup.py +1 -1
  3. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/config.py +16 -16
  4. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/cron_manager.py +3 -2
  5. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/models/api.py +2 -2
  6. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/models/kb.py +1 -1
  7. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/generator.py +1 -1
  8. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/integration/sitemap.py +23 -19
  9. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/loader/base.py +2 -1
  10. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/loader/json_loader.py +3 -4
  11. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/main.py +1 -0
  12. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/task_manager.py +118 -42
  13. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app.egg-info/PKG-INFO +1 -1
  14. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/README.md +0 -0
  15. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/pyproject.toml +0 -0
  16. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/setup.cfg +0 -0
  17. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/__init__.py +0 -0
  18. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/auth.py +0 -0
  19. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/__init__.py +0 -0
  20. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/agent_description.py +0 -0
  21. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/agent_handler.py +0 -0
  22. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/agent_lcel.py +0 -0
  23. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/api.py +0 -0
  24. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/defaut_prompt.py +0 -0
  25. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/main.py +0 -0
  26. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/models/__init__.py +0 -0
  27. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/models/base.py +0 -0
  28. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/settings.py +0 -0
  29. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/__init__.py +0 -0
  30. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/models/__init__.py +0 -0
  31. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/models/main.py +0 -0
  32. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/tool_builder.py +0 -0
  33. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/tool_manager.py +0 -0
  34. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/utils.py +0 -0
  35. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/__init__.py +0 -0
  36. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/agent_utils.py +0 -0
  37. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/download.py +0 -0
  38. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/faiss_helper.py +0 -0
  39. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/kb.py +0 -0
  40. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/print.py +0 -0
  41. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/webhooks.py +0 -0
  42. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/__init__.py +0 -0
  43. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/integration/__init__.py +0 -0
  44. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/integration/base.py +0 -0
  45. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/integration/manager.py +0 -0
  46. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/loader/__init__.py +0 -0
  47. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/requirements.txt +0 -0
  48. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/util.py +0 -0
  49. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app.egg-info/SOURCES.txt +0 -0
  50. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app.egg-info/dependency_links.txt +0 -0
  51. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app.egg-info/requires.txt +0 -0
  52. {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.11
3
+ Version: 0.0.12
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="ws_bom_robot_app",
5
- version="0.0.11",
5
+ version="0.0.12",
6
6
  description="A FastAPI application serving ws bom/robot/llm platform ai.",
7
7
  long_description=open("README.md", encoding='utf-8').read(),
8
8
  long_description_content_type="text/markdown",
@@ -1,7 +1,7 @@
1
1
  from typing import Optional
2
2
  from pydantic import BaseModel, ConfigDict
3
3
  from pydantic_settings import BaseSettings
4
-
4
+ import os
5
5
 
6
6
  class Settings(BaseSettings):
7
7
  robot_env: str = 'local'
@@ -13,6 +13,7 @@ class Settings(BaseSettings):
13
13
  robot_data_db_folder_out: str = 'out'
14
14
  robot_data_db_folder_store: str = 'store'
15
15
  robot_data_db_retention_days: float = 60
16
+ robot_task_max_concurrent: int = os.cpu_count() or 1
16
17
  robot_task_retention_days: float = 1
17
18
  robot_cms_host: str = ''
18
19
  robot_cms_auth: str = ''
@@ -26,38 +27,37 @@ class Settings(BaseSettings):
26
27
  )
27
28
 
28
29
  class RuntimeOptions(BaseModel):
29
- def _is_multi_process() -> bool:
30
+ @staticmethod
31
+ def _get_number_of_workers() -> int:
30
32
  """
31
- Checks if the application is running with multiple worker processes.
33
+ Returns the number of worker processes to use for the application.
32
34
 
33
- This function inspects the command-line arguments to determine if the
34
- application is configured to run with more than one worker process. It
35
- looks for the "--workers" argument and checks if the subsequent value
36
- is greater than 1.
35
+ This function inspects the command-line arguments to determine the number
36
+ of worker processes to use. It looks for the "--workers" argument and
37
+ returns the subsequent value as an integer.
37
38
  Sample of command-line arguments:
39
+ fastapi dev main.py --port 6001
38
40
  fastapi run main.py --port 6001 --workers 4
39
41
  uvicorn main:app --port 6001 --workers 4
40
42
 
41
43
  Returns:
42
- bool: True if the application is running with multiple worker
43
- processes, False otherwise.
44
+ Optional[int]: The number of worker processes to use, or 1 if
45
+ the argument is not found or the value is invalid.
44
46
  """
45
- import sys, os
47
+ import sys
46
48
  try:
47
49
  for i, arg in enumerate(sys.argv):
48
50
  if arg == "--workers" and i + 1 < len(sys.argv):
49
- workers = int(sys.argv[i + 1])
50
- if workers > 1:
51
- return True
51
+ return int(sys.argv[i + 1])
52
52
  except (ValueError, IndexError):
53
53
  pass
54
- # Fallback: Compare process and parent process IDs
55
- return False #os.getpid() != os.getppid()
54
+ return 1
56
55
  debug: bool
57
56
  loader_strategy: str
58
57
  loader_show_progress: bool
59
58
  loader_silent_errors: bool
60
- is_multi_process: bool = _is_multi_process()
59
+ number_of_workers: int = _get_number_of_workers()
60
+ is_multi_process: bool = _get_number_of_workers() > 1
61
61
 
62
62
 
63
63
  def runtime_options(self) -> RuntimeOptions:
@@ -60,7 +60,7 @@ class CronManager:
60
60
  Job('cleanup-data',kb_cleanup_data_file, interval=180 * 60),
61
61
  ]
62
62
  def __get_jobstore_strategy() -> JobstoreStrategy:
63
- if config.runtime_options().is_multi_process:
63
+ if True or config.runtime_options().is_multi_process:
64
64
  return MemoryJobstoreStrategy()
65
65
  return PersistentJobstoreStrategy()
66
66
  def __init__(self, strategy: JobstoreStrategy = None, enable_defaults: bool = True):
@@ -101,7 +101,7 @@ class CronManager:
101
101
  trigger=trigger,
102
102
  id=job.name,
103
103
  name=job.name,
104
- replace_existing=False
104
+ replace_existing=True
105
105
  )
106
106
 
107
107
  def start(self):
@@ -114,6 +114,7 @@ class CronManager:
114
114
  if existing_job is None:
115
115
  self.add_job(job)
116
116
 
117
+
117
118
  def get_job(self, job_id: str):
118
119
  return self.scheduler.get_job(job_id)
119
120
 
@@ -1,4 +1,4 @@
1
- from typing import List, Dict, Optional
1
+ from typing import List, Dict, Optional, Union
2
2
  from datetime import datetime
3
3
  from pydantic import AliasChoices, BaseModel, Field, ConfigDict
4
4
  from ws_bom_robot_app.llm.models.kb import LlmKbEndpoint, LlmKbIntegration
@@ -125,7 +125,7 @@ class VectorDbRequest(BaseModel):
125
125
  def api_key(self):
126
126
  return self.secrets.get("openAIApiKey", "")
127
127
  def out_name(self):
128
- return "vector_db_" + datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f")[:-3]
128
+ return f"db_{datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f")[:-3]}_{os.getpid()}"
129
129
 
130
130
  class RulesRequest(VectorDbRequest):
131
131
  type: Optional[str] = 'rules'
@@ -144,7 +144,7 @@ async def load_endpoints(endpoints: list[LlmKbEndpoint], destination_directory:
144
144
  documents = await JsonLoader(
145
145
  file_path,
146
146
  meta_fields=[field.name for field in endpoint.fields_mapping.meta_fields] if endpoint.fields_mapping.meta_fields else []
147
- ).load()
147
+ ).aload()
148
148
  _documents.extend(documents)
149
149
  await aiofiles.os.remove(file_path)
150
150
  except Exception as e:
@@ -9,7 +9,7 @@ from ws_bom_robot_app.llm.vector_store.integration.manager import IntegrationMan
9
9
  from ws_bom_robot_app.llm.utils.faiss_helper import FaissHelper
10
10
  from ws_bom_robot_app.util import timer
11
11
 
12
- @timer
12
+ #@timer
13
13
  async def rules(rq: RulesRequest) -> VectorDbResponse:
14
14
  api_key = rq.api_key()
15
15
  _config = rq.config()
@@ -1,3 +1,4 @@
1
+ from typing import Any
1
2
  import aiofiles
2
3
  import aiofiles.os
3
4
  from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
@@ -6,21 +7,20 @@ from langchain_community.document_transformers import MarkdownifyTransformer as
6
7
  from langchain_core.documents import Document
7
8
  from bs4 import BeautifulSoup, Tag
8
9
 
9
-
10
10
  class Sitemap(IntegrationStrategy):
11
- """_summary_
11
+ """Class to load a sitemap.xml file and extract text from the URLs.
12
12
  Load a sitemap.xml file and extract text from the urls.
13
13
  Args:
14
14
  data (dict[str, str]):
15
15
  data["sitemapUrl"] (str): absolute/relative url of the sitemap.xml
16
16
  data["outputFormat"] (str): ["text", "html", "markdown"] default to "text"
17
- data["filterUrls"] list: list of regex pattern to filter urls ["https://www.example.com/en/products", "^.*products.*$"]
18
- data["includeOnlySelector"] : [".content", "#main-article", "article p"]
19
- data["excludeTag"] (str): default to ["script", "noscript", "style", "head", "header","nav","footer", "iframe"]
20
- data["excludeClass"] (str): ["class1", "class2"]
21
- data["excludeId"] (str): ["id1", "id2"]
17
+ data["filterUrls"] list[str]: list of regex pattern to filter urls ["https://www.example.com/en/products", "^.*products.*$"]
18
+ data["includeOnlySelector"] : list[str] [".content", "#main-article", "article p"]
19
+ data["excludeTag"] (list[str]): default to ["script", "noscript", "style", "head", "header","nav","footer", "iframe"]
20
+ data["excludeClass"] (list[str]): ["class1", "class2"]
21
+ data["excludeId"] (list[str]): ["id1", "id2"]
22
22
  """
23
- def __init__(self, knowledgebase_path: str, data: dict[str, str]):
23
+ def __init__(self, knowledgebase_path: str, data: dict[str, Any]):
24
24
  super().__init__(knowledgebase_path, data)
25
25
  self.__sitemap_url = self.data.get("sitemapUrl")
26
26
  self.__filter_urls: list[str] = self.data.get("filterUrls",[]) # type: ignore
@@ -29,12 +29,12 @@ class Sitemap(IntegrationStrategy):
29
29
  self.__exclude_tag: list[str] = self.data.get("excludeTag",[]) # type: ignore
30
30
  self.__exclude_class: list[str] = self.data.get("excludeClass",[]) # type: ignore
31
31
  self.__exclude_id: list[str] = self.data.get("excludeId",[]) # type: ignore
32
- def working_subdirectory(self) -> str: # type: ignore
32
+ def working_subdirectory(self) -> str:
33
33
  return ""
34
34
  def _extract(self, tag: Tag) -> str:
35
35
  return tag.get_text() if self.__output_format == "text" else tag.prettify()
36
36
  def _output(self, documents: list[Document]) -> list[Document]:
37
- return list(markdownify().transform_documents(documents)) if (self.__output_format == "markdown") else documents
37
+ return list(markdownify().transform_documents(documents)) if self.__output_format == "markdown" else documents
38
38
  def _parse(self,content: BeautifulSoup) -> str:
39
39
  if self.__include_only_selectors:
40
40
  extracted = []
@@ -54,21 +54,25 @@ class Sitemap(IntegrationStrategy):
54
54
  for _ in content.select(element):
55
55
  _.decompose()
56
56
  return str(self._extract(content))
57
- async def load(self) -> list[Document]:
58
- def _is_local(url: str) -> bool:
59
- return not url.startswith("http")
60
- def _remap_if_local(url: str) -> str:
61
- return f"{self.knowledgebase_path}/{url}" if _is_local(url) else url
57
+ def _is_local(self, url: str) -> bool:
58
+ return not url.startswith("http")
62
59
 
60
+ def _remap_if_local(self, url: str) -> str:
61
+ return f"{self.knowledgebase_path}/{url}" if self._is_local(url) else url
62
+
63
+ async def load(self) -> list[Document]:
63
64
  if (self.__sitemap_url):
64
65
  _loader = SitemapLoader(
65
- web_path=_remap_if_local(self.__sitemap_url),
66
+ web_path=self._remap_if_local(self.__sitemap_url),
66
67
  filter_urls=self.__filter_urls,
67
68
  parsing_function=self._parse,
68
- is_local=_is_local(self.__sitemap_url)
69
+ is_local=self._is_local(self.__sitemap_url)
69
70
  )
70
71
  _docs = self._output([document async for document in _loader.alazy_load()])
71
- if _is_local(self.__sitemap_url):
72
- await aiofiles.os.remove(_loader.web_path)
72
+ if self._is_local(self.__sitemap_url):
73
+ try:
74
+ await aiofiles.os.remove(_loader.web_path)
75
+ except FileNotFoundError:
76
+ pass
73
77
  return _docs
74
78
  return []
@@ -109,4 +109,5 @@ class Loader():
109
109
  #@timer
110
110
  async def load(self) -> list[Document]:
111
111
  loaders = MergedDataLoader(self.__directory_loader())
112
- return await asyncio.to_thread(loaders.load)
112
+ return await loaders.aload()
113
+ #return await asyncio.to_thread(loaders.load)
@@ -2,7 +2,6 @@ import json
2
2
  from typing import Optional
3
3
  from langchain_core.documents import Document
4
4
  from langchain_community.document_loaders.base import BaseLoader
5
- import aiofiles
6
5
 
7
6
  class JsonLoader(BaseLoader):
8
7
  def __init__(self, file_path: str, meta_fields:Optional[list[str]] = [],encoding: Optional[str] = "utf-8"):
@@ -10,9 +9,9 @@ class JsonLoader(BaseLoader):
10
9
  self.meta_fields = meta_fields
11
10
  self.encoding = encoding
12
11
 
13
- async def load(self) -> list[Document]:
14
- async with aiofiles.open(self.file_path, "r", encoding=self.encoding) as file:
15
- data = json.loads(await file.read())
12
+ def load(self) -> list[Document]:
13
+ with open(self.file_path, "r", encoding=self.encoding) as file:
14
+ data = json.load(file)
16
15
  _list = data if isinstance(data, list) else [data]
17
16
  return [
18
17
  Document(
@@ -56,6 +56,7 @@ def diag(authenticate: bool = Depends(authenticate)):
56
56
  "version": platform.version(),
57
57
  "type": platform.machine(),
58
58
  "processor": platform.processor(),
59
+ "cpu": os.cpu_count(),
59
60
  "architecture": platform.architecture()
60
61
  },
61
62
  "sys": {
@@ -1,8 +1,10 @@
1
+ from collections import deque
1
2
  import inspect
3
+ from math import floor
2
4
  import asyncio, os
3
5
  from datetime import datetime, timedelta
4
6
  from enum import Enum
5
- from typing import Annotated, Literal, TypeVar, Optional, Dict, Union, Any
7
+ from typing import Annotated, Coroutine, Literal, TypeVar, Optional, Dict, Union, Any
6
8
  from pydantic import BaseModel, ConfigDict, Field, computed_field
7
9
  from uuid import uuid4
8
10
  from fastapi import APIRouter, HTTPException
@@ -11,41 +13,52 @@ from ws_bom_robot_app.llm.models.base import IdentifiableEntity
11
13
  from ws_bom_robot_app.llm.utils.webhooks import WebhookNotifier
12
14
  from ws_bom_robot_app.util import _log
13
15
  from sqlalchemy import create_engine, Column, String, JSON, DateTime, Enum
14
- from sqlalchemy.ext.declarative import declarative_base
15
- from sqlalchemy.orm import sessionmaker
16
+ from sqlalchemy.orm import sessionmaker, registry
16
17
  from abc import ABC, abstractmethod
17
18
 
18
19
  T = TypeVar('T')
19
20
 
20
21
  #region models
21
- class TaskStatistics(BaseModel):
22
- class TaskStatisticExecutionTime(BaseModel):
23
- min: str
24
- max: str
25
- avg: str
26
- retention_days: float = config.robot_task_retention_days
27
- total: int
28
- pending: int
29
- completed: int
30
- failure: int
31
- exec_time: TaskStatisticExecutionTime
32
-
33
22
  class TaskHeader(BaseModel):
23
+ """
24
+ TaskHeader model representing the header information for a task.
25
+ Example:
26
+ ```bash
27
+ curl -X POST "http://localhost:6001/api/llm/kb/task"
28
+ -H "x-ws-bom-msg-id: 1234"
29
+ -H "x-ws-bom-msg-type: generate.knowledgebase"
30
+ -H "x-ws-bom-msg-extra: key1=value1,key2=value2"
31
+ -H "x-ws-bom-webhooks: http://localhost:8000/api/webhook"
32
+ -d "{\"api_key\":\"string\"}"
33
+ ```
34
+ Attributes:
35
+ x_ws_bom_msg_id (Optional[str]): The message ID for the task. If not provided, a UUID will be generated.
36
+ x_ws_bom_msg_type (Optional[str]): The message type for the task, e.g. "send.email" or "generate.knowledgebase".
37
+ x_ws_bom_msg_extra (Optional[str]): Any extra information for the task, in comma separated key=value pairs. e.g. "key1=value1,key2=value2".
38
+ x_ws_bom_webhooks (Optional[str]): Webhooks associated with the task, called when the task is completed or failed.
39
+ """
40
+ x_ws_bom_msg_id: Optional[str] = None
34
41
  x_ws_bom_msg_type: Optional[str] = None
42
+ x_ws_bom_msg_extra: Optional[str] = None
35
43
  x_ws_bom_webhooks: Optional[str] = None
36
44
  model_config = ConfigDict(
37
45
  extra='allow'
38
46
  )
39
47
 
40
48
  class TaskMetaData(BaseModel):
41
- start_time: str
42
- end_time: Optional[str] = None
49
+ created_at: str
50
+ start_at: Optional[str] = None
51
+ end_at: Optional[str] = None
43
52
  @computed_field
44
53
  @property
45
54
  def elapsed_time(self) -> Union[str, None]:
46
- return str((datetime.now() if not self.end_time else datetime.fromisoformat(self.end_time)) - datetime.fromisoformat(self.start_time))
55
+ return str(
56
+ (datetime.now() if not self.end_at else datetime.fromisoformat(self.end_at))
57
+ - datetime.fromisoformat(self.created_at if not self.start_at else self.start_at)
58
+ )
47
59
  source: Optional[str] = None
48
60
  pid: Optional[int] = None
61
+ extra: Optional[dict[str,str]] = None
49
62
 
50
63
  class TaskStatus(IdentifiableEntity):
51
64
  type: Optional[str] = None
@@ -59,6 +72,7 @@ class TaskStatus(IdentifiableEntity):
59
72
 
60
73
  class TaskEntry(IdentifiableEntity):
61
74
  task: Annotated[asyncio.Task, Field(default=None, validate_default=False)] = None
75
+ coroutine: Coroutine = None
62
76
  headers: TaskHeader | None = None
63
77
  status: Union[TaskStatus, None] = None
64
78
  def _get_coroutine_name(self, coroutine: asyncio.coroutines) -> str:
@@ -66,6 +80,15 @@ class TaskEntry(IdentifiableEntity):
66
80
  return coroutine.cr_code.co_name
67
81
  return "<unknown>"
68
82
  def __init__(self, **data):
83
+ def _metadata_extra(data: str) -> dict[str,str] | None:
84
+ if data:
85
+ _values = data.split(",")
86
+ if _values:
87
+ try:
88
+ return {k: v for k,v in [val.split("=") for val in _values]}
89
+ except Exception as e:
90
+ return None
91
+ return None
69
92
  #separate task from data to handle asyncio.Task
70
93
  task = data.pop('task',None)
71
94
  super().__init__(**data)
@@ -75,22 +98,47 @@ class TaskEntry(IdentifiableEntity):
75
98
  if not self.status:
76
99
  self.status = TaskStatus(
77
100
  id=self.id,
78
- type=self.headers.x_ws_bom_msg_type if self.headers and self.headers.x_ws_bom_msg_type else self._get_coroutine_name(task._coro) if task else None,
101
+ type=self.headers.x_ws_bom_msg_type if self.headers and self.headers.x_ws_bom_msg_type else self._get_coroutine_name(self.coroutine) if self.coroutine else None,
79
102
  status="pending",
80
103
  metadata=TaskMetaData(
81
- start_time=str(datetime.now().isoformat()),
82
- source=self._get_coroutine_name(task._coro) if task else None,
83
- pid=os.getpid())
104
+ created_at=str(datetime.now().isoformat()),
105
+ source=self._get_coroutine_name(self.coroutine) if self.coroutine else None,
106
+ pid=os.getpid(),
107
+ extra=_metadata_extra(self.headers.x_ws_bom_msg_extra) if self.headers and self.headers.x_ws_bom_msg_extra else None
108
+ )
84
109
  )
85
110
  model_config = ConfigDict(
86
111
  arbitrary_types_allowed=True,
87
112
  validate_assignment=True
88
113
  )
89
114
 
115
+ class TaskStatistics(BaseModel):
116
+ class TaskStatisticExecutionInfo(BaseModel):
117
+ retention_days: float = config.robot_task_retention_days
118
+ max_concurrent: int
119
+ running: list[TaskStatus]
120
+ slowest: list
121
+ class TaskStatisticExecutionTime(BaseModel):
122
+ min: str
123
+ max: str
124
+ avg: str
125
+ total: int
126
+ pending: int
127
+ completed: int
128
+ failure: int
129
+ exec_time: TaskStatisticExecutionTime
130
+ exec_info: TaskStatisticExecutionInfo
131
+
90
132
  #endregion
91
133
 
92
134
  #region interface
93
135
  class TaskManagerStrategy(ABC):
136
+ def __init__(self, max_concurrent_tasks: int = floor(2 * config.robot_task_max_concurrent / config.runtime_options().number_of_workers)):
137
+ self.max_concurrent_tasks = max_concurrent_tasks
138
+ self.semaphore = asyncio.Semaphore(self.max_concurrent_tasks)
139
+ self.running_tasks = dict[str, TaskEntry]()
140
+ self.loop = asyncio.get_event_loop()
141
+
94
142
  @abstractmethod
95
143
  def create_task(self, coroutine, headers: TaskHeader | None = None) -> IdentifiableEntity:
96
144
  pass
@@ -105,7 +153,7 @@ class TaskManagerStrategy(ABC):
105
153
  pass
106
154
 
107
155
  @abstractmethod
108
- def get_tasks(self) -> list[TaskStatus] | None:
156
+ def get_tasks(self) -> list[TaskStatus]:
109
157
  pass
110
158
 
111
159
  @abstractmethod
@@ -121,7 +169,7 @@ class TaskManagerStrategy(ABC):
121
169
  pass
122
170
 
123
171
  def task_cleanup_rule(self, task: TaskEntry) -> bool:
124
- return task.status.status in {"completed", "failure"} and datetime.fromisoformat(task.status.metadata.end_time) < datetime.now() - timedelta(days=config.robot_task_retention_days)
172
+ return task.status.status in {"completed", "failure"} and datetime.fromisoformat(task.status.metadata.end_at) < datetime.now() - timedelta(days=config.robot_task_retention_days)
125
173
 
126
174
  def task_done_callback(self, task_entry: TaskEntry, headers: TaskHeader | None = None) -> callable:
127
175
  def callback(task: asyncio.Task):
@@ -133,7 +181,7 @@ class TaskManagerStrategy(ABC):
133
181
  task_entry.status.status = "failure"
134
182
  task_entry.status.error = str(e)
135
183
  finally:
136
- task_entry.status.metadata.end_time = str(datetime.now().isoformat())
184
+ task_entry.status.metadata.end_at = str(datetime.now().isoformat())
137
185
  #strategy-specific behavior
138
186
  self.update_task_status(task_entry)
139
187
  #notify webhooks
@@ -142,14 +190,32 @@ class TaskManagerStrategy(ABC):
142
190
  WebhookNotifier().notify_webhook(task_entry.status,headers.x_ws_bom_webhooks)
143
191
  )
144
192
  return callback
193
+
145
194
  def create_task_entry(self, coroutine: asyncio.coroutines, headers: TaskHeader | None = None) -> TaskEntry:
195
+ _id = headers and headers.x_ws_bom_msg_id or str(uuid4())
146
196
  task = TaskEntry(
147
- id=str(uuid4()),
148
- task=asyncio.create_task(coroutine),
197
+ id=_id,
198
+ coroutine=coroutine,
149
199
  headers=headers)
150
- task.task.add_done_callback(self.task_done_callback(task, headers))
200
+ self.loop.create_task(self._run_task_with_semaphore(task)) # run the task
151
201
  return task
152
202
 
203
+ async def _run_task_with_semaphore(self, task_entry: TaskEntry):
204
+ """Run a task with semaphore control to limit concurrency."""
205
+ async with self.semaphore:
206
+ self.running_tasks[task_entry.id]=task_entry
207
+ await self._execute_task(task_entry)
208
+ del self.running_tasks[task_entry.id]
209
+
210
+ async def _execute_task(self, task_entry: TaskEntry):
211
+ """Execute a task and handle its lifecycle."""
212
+ task_entry.status.metadata.start_at = str(datetime.now().isoformat())
213
+ task_entry.task = asyncio.create_task(task_entry.coroutine)
214
+ task_entry.task.add_done_callback(self.task_done_callback(task_entry, task_entry.headers))
215
+ await task_entry.task
216
+
217
+ def running_task(self):
218
+ return self.running_tasks.values()
153
219
  def stats(self) -> TaskStatistics:
154
220
  def __string_to_timedelta(value: str) -> timedelta:
155
221
  if "." in value:
@@ -163,12 +229,13 @@ class TaskManagerStrategy(ABC):
163
229
  minutes, seconds = divmod(remainder, 60)
164
230
  return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}.{td.microseconds}"
165
231
  _all = self.get_tasks()
166
- _not_pending = _all and [task for task in _all if task.status != "pending"]
167
- _total_not_pending = len(_not_pending)
168
- elapsed_times = [__string_to_timedelta(task.metadata.elapsed_time) for task in _not_pending]
169
- _avg_exec_time = sum(elapsed_times, timedelta()) / _total_not_pending if _total_not_pending > 0 else timedelta()
170
- _min_exec_time = min(elapsed_times) if _total_not_pending > 0 else timedelta()
171
- _max_exec_time = max(elapsed_times) if _total_not_pending > 0 else timedelta()
232
+ _not_pending = _all and [task for task in _all if task.status != "pending"] or []
233
+ _total_not_pending = _not_pending and len(_not_pending) if _not_pending else 0
234
+ elapsed_times = _not_pending and [__string_to_timedelta(task.metadata.elapsed_time) for task in _not_pending]
235
+ _avg_exec_time = sum(elapsed_times, timedelta()) / _total_not_pending if elapsed_times and _total_not_pending > 0 else timedelta()
236
+ _min_exec_time = min(elapsed_times) if elapsed_times and _total_not_pending > 0 else timedelta()
237
+ _max_exec_time = max(elapsed_times) if elapsed_times and _total_not_pending > 0 else timedelta()
238
+ _slowest: list[TaskStatus] = _not_pending and sorted(_not_pending, key=lambda x: __string_to_timedelta(x.metadata.elapsed_time), reverse=True)[:3]
172
239
  return TaskStatistics(
173
240
  total= _all and len(_all) or 0,
174
241
  pending=_all and len([task for task in _all if task.status == "pending"]) or 0,
@@ -178,6 +245,12 @@ class TaskManagerStrategy(ABC):
178
245
  min=__timedelta_to_string(_min_exec_time),
179
246
  max=__timedelta_to_string(_max_exec_time),
180
247
  avg=__timedelta_to_string(_avg_exec_time)
248
+ ),
249
+ exec_info=TaskStatistics.TaskStatisticExecutionInfo(
250
+ retention_days=config.robot_task_retention_days,
251
+ max_concurrent=self.max_concurrent_tasks,
252
+ running=[task.status for task in self.running_task()],
253
+ slowest=_slowest
181
254
  )
182
255
  )
183
256
 
@@ -186,6 +259,7 @@ class TaskManagerStrategy(ABC):
186
259
  #memory implementation
187
260
  class MemoryTaskManagerStrategy(TaskManagerStrategy):
188
261
  def __init__(self):
262
+ super().__init__()
189
263
  self.tasks: Dict[str, TaskEntry] = {}
190
264
 
191
265
  def create_task(self, coroutine: asyncio.coroutines, headers: TaskHeader | None = None) -> IdentifiableEntity:
@@ -199,7 +273,7 @@ class MemoryTaskManagerStrategy(TaskManagerStrategy):
199
273
 
200
274
  def get_task(self, id: str) -> TaskStatus | None:
201
275
  if _task := self.tasks.get(id):
202
- return _task
276
+ return _task.status
203
277
  return None
204
278
 
205
279
  def get_tasks(self) -> list[TaskStatus] | None:
@@ -217,7 +291,7 @@ class MemoryTaskManagerStrategy(TaskManagerStrategy):
217
291
  #endregion
218
292
 
219
293
  #db implementation
220
- Base = declarative_base()
294
+ Base = registry().generate_base()
221
295
  class TaskEntryModel(Base):
222
296
  __tablename__ = "entry"
223
297
  id = Column(String, primary_key=True)
@@ -227,6 +301,7 @@ class TaskEntryModel(Base):
227
301
  )
228
302
  class DatabaseTaskManagerStrategy(TaskManagerStrategy):
229
303
  def __init__(self, db_url: str = "sqlite:///.data/db/tasks.sqlite"):
304
+ super().__init__()
230
305
  self.engine = create_engine(db_url)
231
306
  self.Session = sessionmaker(bind=self.engine)
232
307
  Base.metadata.create_all(self.engine)
@@ -249,15 +324,15 @@ class DatabaseTaskManagerStrategy(TaskManagerStrategy):
249
324
  with self.Session() as session:
250
325
  task = session.query(TaskEntryModel).filter_by(id=id).first()
251
326
  if task:
252
- return TaskEntry(**task.__dict__)
327
+ return TaskEntry(**task.__dict__).status
253
328
  return None
254
329
 
255
- def get_tasks(self) -> list[TaskStatus] | None:
330
+ def get_tasks(self) -> list[TaskStatus]:
256
331
  with self.Session() as session:
257
332
  tasks = session.query(TaskEntryModel).all()
258
333
  if tasks:
259
334
  return [TaskEntry(**task.__dict__).status for task in tasks]
260
- return None
335
+ return []
261
336
 
262
337
  def remove_task(self, id: str) -> None:
263
338
  with self.Session() as session:
@@ -286,10 +361,10 @@ router = APIRouter(prefix="/api/task", tags=["task"])
286
361
 
287
362
  @router.get("/status/{id}")
288
363
  async def _status_task(id: str) -> TaskStatus:
289
- task = task_manager.get_task(id)
290
- if not task:
364
+ task_status = task_manager.get_task(id)
365
+ if not task_status:
291
366
  raise HTTPException(status_code=404, detail="Task not found")
292
- return task.status
367
+ return task_status
293
368
 
294
369
  @router.get("/status")
295
370
  async def _status_task_list() -> list[TaskStatus]:
@@ -308,4 +383,5 @@ async def _remove_task_list():
308
383
  @router.get("/stats")
309
384
  async def _stats() -> TaskStatistics:
310
385
  return task_manager.stats()
386
+
311
387
  #endregion
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.11
3
+ Version: 0.0.12
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa