ws-bom-robot-app 0.0.11__tar.gz → 0.0.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/PKG-INFO +1 -1
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/setup.py +1 -1
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/config.py +16 -16
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/cron_manager.py +3 -2
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/models/api.py +2 -2
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/models/kb.py +1 -1
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/generator.py +1 -1
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/integration/sitemap.py +23 -19
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/loader/base.py +2 -1
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/loader/json_loader.py +3 -4
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/main.py +1 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/task_manager.py +118 -42
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app.egg-info/PKG-INFO +1 -1
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/README.md +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/pyproject.toml +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/setup.cfg +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/__init__.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/auth.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/__init__.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/agent_description.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/agent_handler.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/agent_lcel.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/api.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/defaut_prompt.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/main.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/models/__init__.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/models/base.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/settings.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/__init__.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/models/__init__.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/models/main.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/tool_builder.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/tool_manager.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/utils.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/__init__.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/agent_utils.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/download.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/faiss_helper.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/kb.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/print.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/webhooks.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/__init__.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/integration/__init__.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/integration/base.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/integration/manager.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/loader/__init__.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/requirements.txt +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/util.py +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app.egg-info/SOURCES.txt +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app.egg-info/dependency_links.txt +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app.egg-info/requires.txt +0 -0
- {ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app.egg-info/top_level.txt +0 -0
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="ws_bom_robot_app",
|
|
5
|
-
version="0.0.
|
|
5
|
+
version="0.0.12",
|
|
6
6
|
description="A FastAPI application serving ws bom/robot/llm platform ai.",
|
|
7
7
|
long_description=open("README.md", encoding='utf-8').read(),
|
|
8
8
|
long_description_content_type="text/markdown",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
2
|
from pydantic import BaseModel, ConfigDict
|
|
3
3
|
from pydantic_settings import BaseSettings
|
|
4
|
-
|
|
4
|
+
import os
|
|
5
5
|
|
|
6
6
|
class Settings(BaseSettings):
|
|
7
7
|
robot_env: str = 'local'
|
|
@@ -13,6 +13,7 @@ class Settings(BaseSettings):
|
|
|
13
13
|
robot_data_db_folder_out: str = 'out'
|
|
14
14
|
robot_data_db_folder_store: str = 'store'
|
|
15
15
|
robot_data_db_retention_days: float = 60
|
|
16
|
+
robot_task_max_concurrent: int = os.cpu_count() or 1
|
|
16
17
|
robot_task_retention_days: float = 1
|
|
17
18
|
robot_cms_host: str = ''
|
|
18
19
|
robot_cms_auth: str = ''
|
|
@@ -26,38 +27,37 @@ class Settings(BaseSettings):
|
|
|
26
27
|
)
|
|
27
28
|
|
|
28
29
|
class RuntimeOptions(BaseModel):
|
|
29
|
-
|
|
30
|
+
@staticmethod
|
|
31
|
+
def _get_number_of_workers() -> int:
|
|
30
32
|
"""
|
|
31
|
-
|
|
33
|
+
Returns the number of worker processes to use for the application.
|
|
32
34
|
|
|
33
|
-
This function inspects the command-line arguments to determine
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
is greater than 1.
|
|
35
|
+
This function inspects the command-line arguments to determine the number
|
|
36
|
+
of worker processes to use. It looks for the "--workers" argument and
|
|
37
|
+
returns the subsequent value as an integer.
|
|
37
38
|
Sample of command-line arguments:
|
|
39
|
+
fastapi dev main.py --port 6001
|
|
38
40
|
fastapi run main.py --port 6001 --workers 4
|
|
39
41
|
uvicorn main:app --port 6001 --workers 4
|
|
40
42
|
|
|
41
43
|
Returns:
|
|
42
|
-
|
|
43
|
-
|
|
44
|
+
Optional[int]: The number of worker processes to use, or 1 if
|
|
45
|
+
the argument is not found or the value is invalid.
|
|
44
46
|
"""
|
|
45
|
-
import sys
|
|
47
|
+
import sys
|
|
46
48
|
try:
|
|
47
49
|
for i, arg in enumerate(sys.argv):
|
|
48
50
|
if arg == "--workers" and i + 1 < len(sys.argv):
|
|
49
|
-
|
|
50
|
-
if workers > 1:
|
|
51
|
-
return True
|
|
51
|
+
return int(sys.argv[i + 1])
|
|
52
52
|
except (ValueError, IndexError):
|
|
53
53
|
pass
|
|
54
|
-
|
|
55
|
-
return False #os.getpid() != os.getppid()
|
|
54
|
+
return 1
|
|
56
55
|
debug: bool
|
|
57
56
|
loader_strategy: str
|
|
58
57
|
loader_show_progress: bool
|
|
59
58
|
loader_silent_errors: bool
|
|
60
|
-
|
|
59
|
+
number_of_workers: int = _get_number_of_workers()
|
|
60
|
+
is_multi_process: bool = _get_number_of_workers() > 1
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
def runtime_options(self) -> RuntimeOptions:
|
|
@@ -60,7 +60,7 @@ class CronManager:
|
|
|
60
60
|
Job('cleanup-data',kb_cleanup_data_file, interval=180 * 60),
|
|
61
61
|
]
|
|
62
62
|
def __get_jobstore_strategy() -> JobstoreStrategy:
|
|
63
|
-
if config.runtime_options().is_multi_process:
|
|
63
|
+
if True or config.runtime_options().is_multi_process:
|
|
64
64
|
return MemoryJobstoreStrategy()
|
|
65
65
|
return PersistentJobstoreStrategy()
|
|
66
66
|
def __init__(self, strategy: JobstoreStrategy = None, enable_defaults: bool = True):
|
|
@@ -101,7 +101,7 @@ class CronManager:
|
|
|
101
101
|
trigger=trigger,
|
|
102
102
|
id=job.name,
|
|
103
103
|
name=job.name,
|
|
104
|
-
replace_existing=
|
|
104
|
+
replace_existing=True
|
|
105
105
|
)
|
|
106
106
|
|
|
107
107
|
def start(self):
|
|
@@ -114,6 +114,7 @@ class CronManager:
|
|
|
114
114
|
if existing_job is None:
|
|
115
115
|
self.add_job(job)
|
|
116
116
|
|
|
117
|
+
|
|
117
118
|
def get_job(self, job_id: str):
|
|
118
119
|
return self.scheduler.get_job(job_id)
|
|
119
120
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List, Dict, Optional
|
|
1
|
+
from typing import List, Dict, Optional, Union
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from pydantic import AliasChoices, BaseModel, Field, ConfigDict
|
|
4
4
|
from ws_bom_robot_app.llm.models.kb import LlmKbEndpoint, LlmKbIntegration
|
|
@@ -125,7 +125,7 @@ class VectorDbRequest(BaseModel):
|
|
|
125
125
|
def api_key(self):
|
|
126
126
|
return self.secrets.get("openAIApiKey", "")
|
|
127
127
|
def out_name(self):
|
|
128
|
-
return "
|
|
128
|
+
return f"db_{datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f")[:-3]}_{os.getpid()}"
|
|
129
129
|
|
|
130
130
|
class RulesRequest(VectorDbRequest):
|
|
131
131
|
type: Optional[str] = 'rules'
|
|
@@ -144,7 +144,7 @@ async def load_endpoints(endpoints: list[LlmKbEndpoint], destination_directory:
|
|
|
144
144
|
documents = await JsonLoader(
|
|
145
145
|
file_path,
|
|
146
146
|
meta_fields=[field.name for field in endpoint.fields_mapping.meta_fields] if endpoint.fields_mapping.meta_fields else []
|
|
147
|
-
).
|
|
147
|
+
).aload()
|
|
148
148
|
_documents.extend(documents)
|
|
149
149
|
await aiofiles.os.remove(file_path)
|
|
150
150
|
except Exception as e:
|
{ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/generator.py
RENAMED
|
@@ -9,7 +9,7 @@ from ws_bom_robot_app.llm.vector_store.integration.manager import IntegrationMan
|
|
|
9
9
|
from ws_bom_robot_app.llm.utils.faiss_helper import FaissHelper
|
|
10
10
|
from ws_bom_robot_app.util import timer
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
#@timer
|
|
13
13
|
async def rules(rq: RulesRequest) -> VectorDbResponse:
|
|
14
14
|
api_key = rq.api_key()
|
|
15
15
|
_config = rq.config()
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from typing import Any
|
|
1
2
|
import aiofiles
|
|
2
3
|
import aiofiles.os
|
|
3
4
|
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
|
|
@@ -6,21 +7,20 @@ from langchain_community.document_transformers import MarkdownifyTransformer as
|
|
|
6
7
|
from langchain_core.documents import Document
|
|
7
8
|
from bs4 import BeautifulSoup, Tag
|
|
8
9
|
|
|
9
|
-
|
|
10
10
|
class Sitemap(IntegrationStrategy):
|
|
11
|
-
"""
|
|
11
|
+
"""Class to load a sitemap.xml file and extract text from the URLs.
|
|
12
12
|
Load a sitemap.xml file and extract text from the urls.
|
|
13
13
|
Args:
|
|
14
14
|
data (dict[str, str]):
|
|
15
15
|
data["sitemapUrl"] (str): absolute/relative url of the sitemap.xml
|
|
16
16
|
data["outputFormat"] (str): ["text", "html", "markdown"] default to "text"
|
|
17
|
-
data["filterUrls"] list: list of regex pattern to filter urls ["https://www.example.com/en/products", "^.*products.*$"]
|
|
18
|
-
data["includeOnlySelector"] : [".content", "#main-article", "article p"]
|
|
19
|
-
data["excludeTag"] (str): default to ["script", "noscript", "style", "head", "header","nav","footer", "iframe"]
|
|
20
|
-
data["excludeClass"] (str): ["class1", "class2"]
|
|
21
|
-
data["excludeId"] (str): ["id1", "id2"]
|
|
17
|
+
data["filterUrls"] list[str]: list of regex pattern to filter urls ["https://www.example.com/en/products", "^.*products.*$"]
|
|
18
|
+
data["includeOnlySelector"] : list[str] [".content", "#main-article", "article p"]
|
|
19
|
+
data["excludeTag"] (list[str]): default to ["script", "noscript", "style", "head", "header","nav","footer", "iframe"]
|
|
20
|
+
data["excludeClass"] (list[str]): ["class1", "class2"]
|
|
21
|
+
data["excludeId"] (list[str]): ["id1", "id2"]
|
|
22
22
|
"""
|
|
23
|
-
def __init__(self, knowledgebase_path: str, data: dict[str,
|
|
23
|
+
def __init__(self, knowledgebase_path: str, data: dict[str, Any]):
|
|
24
24
|
super().__init__(knowledgebase_path, data)
|
|
25
25
|
self.__sitemap_url = self.data.get("sitemapUrl")
|
|
26
26
|
self.__filter_urls: list[str] = self.data.get("filterUrls",[]) # type: ignore
|
|
@@ -29,12 +29,12 @@ class Sitemap(IntegrationStrategy):
|
|
|
29
29
|
self.__exclude_tag: list[str] = self.data.get("excludeTag",[]) # type: ignore
|
|
30
30
|
self.__exclude_class: list[str] = self.data.get("excludeClass",[]) # type: ignore
|
|
31
31
|
self.__exclude_id: list[str] = self.data.get("excludeId",[]) # type: ignore
|
|
32
|
-
def working_subdirectory(self) -> str:
|
|
32
|
+
def working_subdirectory(self) -> str:
|
|
33
33
|
return ""
|
|
34
34
|
def _extract(self, tag: Tag) -> str:
|
|
35
35
|
return tag.get_text() if self.__output_format == "text" else tag.prettify()
|
|
36
36
|
def _output(self, documents: list[Document]) -> list[Document]:
|
|
37
|
-
return list(markdownify().transform_documents(documents)) if
|
|
37
|
+
return list(markdownify().transform_documents(documents)) if self.__output_format == "markdown" else documents
|
|
38
38
|
def _parse(self,content: BeautifulSoup) -> str:
|
|
39
39
|
if self.__include_only_selectors:
|
|
40
40
|
extracted = []
|
|
@@ -54,21 +54,25 @@ class Sitemap(IntegrationStrategy):
|
|
|
54
54
|
for _ in content.select(element):
|
|
55
55
|
_.decompose()
|
|
56
56
|
return str(self._extract(content))
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
return not url.startswith("http")
|
|
60
|
-
def _remap_if_local(url: str) -> str:
|
|
61
|
-
return f"{self.knowledgebase_path}/{url}" if _is_local(url) else url
|
|
57
|
+
def _is_local(self, url: str) -> bool:
|
|
58
|
+
return not url.startswith("http")
|
|
62
59
|
|
|
60
|
+
def _remap_if_local(self, url: str) -> str:
|
|
61
|
+
return f"{self.knowledgebase_path}/{url}" if self._is_local(url) else url
|
|
62
|
+
|
|
63
|
+
async def load(self) -> list[Document]:
|
|
63
64
|
if (self.__sitemap_url):
|
|
64
65
|
_loader = SitemapLoader(
|
|
65
|
-
web_path=_remap_if_local(self.__sitemap_url),
|
|
66
|
+
web_path=self._remap_if_local(self.__sitemap_url),
|
|
66
67
|
filter_urls=self.__filter_urls,
|
|
67
68
|
parsing_function=self._parse,
|
|
68
|
-
is_local=_is_local(self.__sitemap_url)
|
|
69
|
+
is_local=self._is_local(self.__sitemap_url)
|
|
69
70
|
)
|
|
70
71
|
_docs = self._output([document async for document in _loader.alazy_load()])
|
|
71
|
-
if _is_local(self.__sitemap_url):
|
|
72
|
-
|
|
72
|
+
if self._is_local(self.__sitemap_url):
|
|
73
|
+
try:
|
|
74
|
+
await aiofiles.os.remove(_loader.web_path)
|
|
75
|
+
except FileNotFoundError:
|
|
76
|
+
pass
|
|
73
77
|
return _docs
|
|
74
78
|
return []
|
|
@@ -2,7 +2,6 @@ import json
|
|
|
2
2
|
from typing import Optional
|
|
3
3
|
from langchain_core.documents import Document
|
|
4
4
|
from langchain_community.document_loaders.base import BaseLoader
|
|
5
|
-
import aiofiles
|
|
6
5
|
|
|
7
6
|
class JsonLoader(BaseLoader):
|
|
8
7
|
def __init__(self, file_path: str, meta_fields:Optional[list[str]] = [],encoding: Optional[str] = "utf-8"):
|
|
@@ -10,9 +9,9 @@ class JsonLoader(BaseLoader):
|
|
|
10
9
|
self.meta_fields = meta_fields
|
|
11
10
|
self.encoding = encoding
|
|
12
11
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
data = json.
|
|
12
|
+
def load(self) -> list[Document]:
|
|
13
|
+
with open(self.file_path, "r", encoding=self.encoding) as file:
|
|
14
|
+
data = json.load(file)
|
|
16
15
|
_list = data if isinstance(data, list) else [data]
|
|
17
16
|
return [
|
|
18
17
|
Document(
|
|
@@ -1,8 +1,10 @@
|
|
|
1
|
+
from collections import deque
|
|
1
2
|
import inspect
|
|
3
|
+
from math import floor
|
|
2
4
|
import asyncio, os
|
|
3
5
|
from datetime import datetime, timedelta
|
|
4
6
|
from enum import Enum
|
|
5
|
-
from typing import Annotated, Literal, TypeVar, Optional, Dict, Union, Any
|
|
7
|
+
from typing import Annotated, Coroutine, Literal, TypeVar, Optional, Dict, Union, Any
|
|
6
8
|
from pydantic import BaseModel, ConfigDict, Field, computed_field
|
|
7
9
|
from uuid import uuid4
|
|
8
10
|
from fastapi import APIRouter, HTTPException
|
|
@@ -11,41 +13,52 @@ from ws_bom_robot_app.llm.models.base import IdentifiableEntity
|
|
|
11
13
|
from ws_bom_robot_app.llm.utils.webhooks import WebhookNotifier
|
|
12
14
|
from ws_bom_robot_app.util import _log
|
|
13
15
|
from sqlalchemy import create_engine, Column, String, JSON, DateTime, Enum
|
|
14
|
-
from sqlalchemy.
|
|
15
|
-
from sqlalchemy.orm import sessionmaker
|
|
16
|
+
from sqlalchemy.orm import sessionmaker, registry
|
|
16
17
|
from abc import ABC, abstractmethod
|
|
17
18
|
|
|
18
19
|
T = TypeVar('T')
|
|
19
20
|
|
|
20
21
|
#region models
|
|
21
|
-
class TaskStatistics(BaseModel):
|
|
22
|
-
class TaskStatisticExecutionTime(BaseModel):
|
|
23
|
-
min: str
|
|
24
|
-
max: str
|
|
25
|
-
avg: str
|
|
26
|
-
retention_days: float = config.robot_task_retention_days
|
|
27
|
-
total: int
|
|
28
|
-
pending: int
|
|
29
|
-
completed: int
|
|
30
|
-
failure: int
|
|
31
|
-
exec_time: TaskStatisticExecutionTime
|
|
32
|
-
|
|
33
22
|
class TaskHeader(BaseModel):
|
|
23
|
+
"""
|
|
24
|
+
TaskHeader model representing the header information for a task.
|
|
25
|
+
Example:
|
|
26
|
+
```bash
|
|
27
|
+
curl -X POST "http://localhost:6001/api/llm/kb/task"
|
|
28
|
+
-H "x-ws-bom-msg-id: 1234"
|
|
29
|
+
-H "x-ws-bom-msg-type: generate.knowledgebase"
|
|
30
|
+
-H "x-ws-bom-msg-extra: key1=value1,key2=value2"
|
|
31
|
+
-H "x-ws-bom-webhooks: http://localhost:8000/api/webhook"
|
|
32
|
+
-d "{\"api_key\":\"string\"}"
|
|
33
|
+
```
|
|
34
|
+
Attributes:
|
|
35
|
+
x_ws_bom_msg_id (Optional[str]): The message ID for the task. If not provided, a UUID will be generated.
|
|
36
|
+
x_ws_bom_msg_type (Optional[str]): The message type for the task, e.g. "send.email" or "generate.knowledgebase".
|
|
37
|
+
x_ws_bom_msg_extra (Optional[str]): Any extra information for the task, in comma separated key=value pairs. e.g. "key1=value1,key2=value2".
|
|
38
|
+
x_ws_bom_webhooks (Optional[str]): Webhooks associated with the task, called when the task is completed or failed.
|
|
39
|
+
"""
|
|
40
|
+
x_ws_bom_msg_id: Optional[str] = None
|
|
34
41
|
x_ws_bom_msg_type: Optional[str] = None
|
|
42
|
+
x_ws_bom_msg_extra: Optional[str] = None
|
|
35
43
|
x_ws_bom_webhooks: Optional[str] = None
|
|
36
44
|
model_config = ConfigDict(
|
|
37
45
|
extra='allow'
|
|
38
46
|
)
|
|
39
47
|
|
|
40
48
|
class TaskMetaData(BaseModel):
|
|
41
|
-
|
|
42
|
-
|
|
49
|
+
created_at: str
|
|
50
|
+
start_at: Optional[str] = None
|
|
51
|
+
end_at: Optional[str] = None
|
|
43
52
|
@computed_field
|
|
44
53
|
@property
|
|
45
54
|
def elapsed_time(self) -> Union[str, None]:
|
|
46
|
-
return str(
|
|
55
|
+
return str(
|
|
56
|
+
(datetime.now() if not self.end_at else datetime.fromisoformat(self.end_at))
|
|
57
|
+
- datetime.fromisoformat(self.created_at if not self.start_at else self.start_at)
|
|
58
|
+
)
|
|
47
59
|
source: Optional[str] = None
|
|
48
60
|
pid: Optional[int] = None
|
|
61
|
+
extra: Optional[dict[str,str]] = None
|
|
49
62
|
|
|
50
63
|
class TaskStatus(IdentifiableEntity):
|
|
51
64
|
type: Optional[str] = None
|
|
@@ -59,6 +72,7 @@ class TaskStatus(IdentifiableEntity):
|
|
|
59
72
|
|
|
60
73
|
class TaskEntry(IdentifiableEntity):
|
|
61
74
|
task: Annotated[asyncio.Task, Field(default=None, validate_default=False)] = None
|
|
75
|
+
coroutine: Coroutine = None
|
|
62
76
|
headers: TaskHeader | None = None
|
|
63
77
|
status: Union[TaskStatus, None] = None
|
|
64
78
|
def _get_coroutine_name(self, coroutine: asyncio.coroutines) -> str:
|
|
@@ -66,6 +80,15 @@ class TaskEntry(IdentifiableEntity):
|
|
|
66
80
|
return coroutine.cr_code.co_name
|
|
67
81
|
return "<unknown>"
|
|
68
82
|
def __init__(self, **data):
|
|
83
|
+
def _metadata_extra(data: str) -> dict[str,str] | None:
|
|
84
|
+
if data:
|
|
85
|
+
_values = data.split(",")
|
|
86
|
+
if _values:
|
|
87
|
+
try:
|
|
88
|
+
return {k: v for k,v in [val.split("=") for val in _values]}
|
|
89
|
+
except Exception as e:
|
|
90
|
+
return None
|
|
91
|
+
return None
|
|
69
92
|
#separate task from data to handle asyncio.Task
|
|
70
93
|
task = data.pop('task',None)
|
|
71
94
|
super().__init__(**data)
|
|
@@ -75,22 +98,47 @@ class TaskEntry(IdentifiableEntity):
|
|
|
75
98
|
if not self.status:
|
|
76
99
|
self.status = TaskStatus(
|
|
77
100
|
id=self.id,
|
|
78
|
-
type=self.headers.x_ws_bom_msg_type if self.headers and self.headers.x_ws_bom_msg_type else self._get_coroutine_name(
|
|
101
|
+
type=self.headers.x_ws_bom_msg_type if self.headers and self.headers.x_ws_bom_msg_type else self._get_coroutine_name(self.coroutine) if self.coroutine else None,
|
|
79
102
|
status="pending",
|
|
80
103
|
metadata=TaskMetaData(
|
|
81
|
-
|
|
82
|
-
source=self._get_coroutine_name(
|
|
83
|
-
pid=os.getpid()
|
|
104
|
+
created_at=str(datetime.now().isoformat()),
|
|
105
|
+
source=self._get_coroutine_name(self.coroutine) if self.coroutine else None,
|
|
106
|
+
pid=os.getpid(),
|
|
107
|
+
extra=_metadata_extra(self.headers.x_ws_bom_msg_extra) if self.headers and self.headers.x_ws_bom_msg_extra else None
|
|
108
|
+
)
|
|
84
109
|
)
|
|
85
110
|
model_config = ConfigDict(
|
|
86
111
|
arbitrary_types_allowed=True,
|
|
87
112
|
validate_assignment=True
|
|
88
113
|
)
|
|
89
114
|
|
|
115
|
+
class TaskStatistics(BaseModel):
|
|
116
|
+
class TaskStatisticExecutionInfo(BaseModel):
|
|
117
|
+
retention_days: float = config.robot_task_retention_days
|
|
118
|
+
max_concurrent: int
|
|
119
|
+
running: list[TaskStatus]
|
|
120
|
+
slowest: list
|
|
121
|
+
class TaskStatisticExecutionTime(BaseModel):
|
|
122
|
+
min: str
|
|
123
|
+
max: str
|
|
124
|
+
avg: str
|
|
125
|
+
total: int
|
|
126
|
+
pending: int
|
|
127
|
+
completed: int
|
|
128
|
+
failure: int
|
|
129
|
+
exec_time: TaskStatisticExecutionTime
|
|
130
|
+
exec_info: TaskStatisticExecutionInfo
|
|
131
|
+
|
|
90
132
|
#endregion
|
|
91
133
|
|
|
92
134
|
#region interface
|
|
93
135
|
class TaskManagerStrategy(ABC):
|
|
136
|
+
def __init__(self, max_concurrent_tasks: int = floor(2 * config.robot_task_max_concurrent / config.runtime_options().number_of_workers)):
|
|
137
|
+
self.max_concurrent_tasks = max_concurrent_tasks
|
|
138
|
+
self.semaphore = asyncio.Semaphore(self.max_concurrent_tasks)
|
|
139
|
+
self.running_tasks = dict[str, TaskEntry]()
|
|
140
|
+
self.loop = asyncio.get_event_loop()
|
|
141
|
+
|
|
94
142
|
@abstractmethod
|
|
95
143
|
def create_task(self, coroutine, headers: TaskHeader | None = None) -> IdentifiableEntity:
|
|
96
144
|
pass
|
|
@@ -105,7 +153,7 @@ class TaskManagerStrategy(ABC):
|
|
|
105
153
|
pass
|
|
106
154
|
|
|
107
155
|
@abstractmethod
|
|
108
|
-
def get_tasks(self) -> list[TaskStatus]
|
|
156
|
+
def get_tasks(self) -> list[TaskStatus]:
|
|
109
157
|
pass
|
|
110
158
|
|
|
111
159
|
@abstractmethod
|
|
@@ -121,7 +169,7 @@ class TaskManagerStrategy(ABC):
|
|
|
121
169
|
pass
|
|
122
170
|
|
|
123
171
|
def task_cleanup_rule(self, task: TaskEntry) -> bool:
|
|
124
|
-
return task.status.status in {"completed", "failure"} and datetime.fromisoformat(task.status.metadata.
|
|
172
|
+
return task.status.status in {"completed", "failure"} and datetime.fromisoformat(task.status.metadata.end_at) < datetime.now() - timedelta(days=config.robot_task_retention_days)
|
|
125
173
|
|
|
126
174
|
def task_done_callback(self, task_entry: TaskEntry, headers: TaskHeader | None = None) -> callable:
|
|
127
175
|
def callback(task: asyncio.Task):
|
|
@@ -133,7 +181,7 @@ class TaskManagerStrategy(ABC):
|
|
|
133
181
|
task_entry.status.status = "failure"
|
|
134
182
|
task_entry.status.error = str(e)
|
|
135
183
|
finally:
|
|
136
|
-
task_entry.status.metadata.
|
|
184
|
+
task_entry.status.metadata.end_at = str(datetime.now().isoformat())
|
|
137
185
|
#strategy-specific behavior
|
|
138
186
|
self.update_task_status(task_entry)
|
|
139
187
|
#notify webhooks
|
|
@@ -142,14 +190,32 @@ class TaskManagerStrategy(ABC):
|
|
|
142
190
|
WebhookNotifier().notify_webhook(task_entry.status,headers.x_ws_bom_webhooks)
|
|
143
191
|
)
|
|
144
192
|
return callback
|
|
193
|
+
|
|
145
194
|
def create_task_entry(self, coroutine: asyncio.coroutines, headers: TaskHeader | None = None) -> TaskEntry:
|
|
195
|
+
_id = headers and headers.x_ws_bom_msg_id or str(uuid4())
|
|
146
196
|
task = TaskEntry(
|
|
147
|
-
id=
|
|
148
|
-
|
|
197
|
+
id=_id,
|
|
198
|
+
coroutine=coroutine,
|
|
149
199
|
headers=headers)
|
|
150
|
-
|
|
200
|
+
self.loop.create_task(self._run_task_with_semaphore(task)) # run the task
|
|
151
201
|
return task
|
|
152
202
|
|
|
203
|
+
async def _run_task_with_semaphore(self, task_entry: TaskEntry):
|
|
204
|
+
"""Run a task with semaphore control to limit concurrency."""
|
|
205
|
+
async with self.semaphore:
|
|
206
|
+
self.running_tasks[task_entry.id]=task_entry
|
|
207
|
+
await self._execute_task(task_entry)
|
|
208
|
+
del self.running_tasks[task_entry.id]
|
|
209
|
+
|
|
210
|
+
async def _execute_task(self, task_entry: TaskEntry):
|
|
211
|
+
"""Execute a task and handle its lifecycle."""
|
|
212
|
+
task_entry.status.metadata.start_at = str(datetime.now().isoformat())
|
|
213
|
+
task_entry.task = asyncio.create_task(task_entry.coroutine)
|
|
214
|
+
task_entry.task.add_done_callback(self.task_done_callback(task_entry, task_entry.headers))
|
|
215
|
+
await task_entry.task
|
|
216
|
+
|
|
217
|
+
def running_task(self):
|
|
218
|
+
return self.running_tasks.values()
|
|
153
219
|
def stats(self) -> TaskStatistics:
|
|
154
220
|
def __string_to_timedelta(value: str) -> timedelta:
|
|
155
221
|
if "." in value:
|
|
@@ -163,12 +229,13 @@ class TaskManagerStrategy(ABC):
|
|
|
163
229
|
minutes, seconds = divmod(remainder, 60)
|
|
164
230
|
return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}.{td.microseconds}"
|
|
165
231
|
_all = self.get_tasks()
|
|
166
|
-
_not_pending = _all and [task for task in _all if task.status != "pending"]
|
|
167
|
-
_total_not_pending = len(_not_pending)
|
|
168
|
-
elapsed_times = [__string_to_timedelta(task.metadata.elapsed_time) for task in _not_pending]
|
|
169
|
-
_avg_exec_time = sum(elapsed_times, timedelta()) / _total_not_pending if _total_not_pending > 0 else timedelta()
|
|
170
|
-
_min_exec_time = min(elapsed_times) if _total_not_pending > 0 else timedelta()
|
|
171
|
-
_max_exec_time = max(elapsed_times) if _total_not_pending > 0 else timedelta()
|
|
232
|
+
_not_pending = _all and [task for task in _all if task.status != "pending"] or []
|
|
233
|
+
_total_not_pending = _not_pending and len(_not_pending) if _not_pending else 0
|
|
234
|
+
elapsed_times = _not_pending and [__string_to_timedelta(task.metadata.elapsed_time) for task in _not_pending]
|
|
235
|
+
_avg_exec_time = sum(elapsed_times, timedelta()) / _total_not_pending if elapsed_times and _total_not_pending > 0 else timedelta()
|
|
236
|
+
_min_exec_time = min(elapsed_times) if elapsed_times and _total_not_pending > 0 else timedelta()
|
|
237
|
+
_max_exec_time = max(elapsed_times) if elapsed_times and _total_not_pending > 0 else timedelta()
|
|
238
|
+
_slowest: list[TaskStatus] = _not_pending and sorted(_not_pending, key=lambda x: __string_to_timedelta(x.metadata.elapsed_time), reverse=True)[:3]
|
|
172
239
|
return TaskStatistics(
|
|
173
240
|
total= _all and len(_all) or 0,
|
|
174
241
|
pending=_all and len([task for task in _all if task.status == "pending"]) or 0,
|
|
@@ -178,6 +245,12 @@ class TaskManagerStrategy(ABC):
|
|
|
178
245
|
min=__timedelta_to_string(_min_exec_time),
|
|
179
246
|
max=__timedelta_to_string(_max_exec_time),
|
|
180
247
|
avg=__timedelta_to_string(_avg_exec_time)
|
|
248
|
+
),
|
|
249
|
+
exec_info=TaskStatistics.TaskStatisticExecutionInfo(
|
|
250
|
+
retention_days=config.robot_task_retention_days,
|
|
251
|
+
max_concurrent=self.max_concurrent_tasks,
|
|
252
|
+
running=[task.status for task in self.running_task()],
|
|
253
|
+
slowest=_slowest
|
|
181
254
|
)
|
|
182
255
|
)
|
|
183
256
|
|
|
@@ -186,6 +259,7 @@ class TaskManagerStrategy(ABC):
|
|
|
186
259
|
#memory implementation
|
|
187
260
|
class MemoryTaskManagerStrategy(TaskManagerStrategy):
|
|
188
261
|
def __init__(self):
|
|
262
|
+
super().__init__()
|
|
189
263
|
self.tasks: Dict[str, TaskEntry] = {}
|
|
190
264
|
|
|
191
265
|
def create_task(self, coroutine: asyncio.coroutines, headers: TaskHeader | None = None) -> IdentifiableEntity:
|
|
@@ -199,7 +273,7 @@ class MemoryTaskManagerStrategy(TaskManagerStrategy):
|
|
|
199
273
|
|
|
200
274
|
def get_task(self, id: str) -> TaskStatus | None:
|
|
201
275
|
if _task := self.tasks.get(id):
|
|
202
|
-
return _task
|
|
276
|
+
return _task.status
|
|
203
277
|
return None
|
|
204
278
|
|
|
205
279
|
def get_tasks(self) -> list[TaskStatus] | None:
|
|
@@ -217,7 +291,7 @@ class MemoryTaskManagerStrategy(TaskManagerStrategy):
|
|
|
217
291
|
#endregion
|
|
218
292
|
|
|
219
293
|
#db implementation
|
|
220
|
-
Base =
|
|
294
|
+
Base = registry().generate_base()
|
|
221
295
|
class TaskEntryModel(Base):
|
|
222
296
|
__tablename__ = "entry"
|
|
223
297
|
id = Column(String, primary_key=True)
|
|
@@ -227,6 +301,7 @@ class TaskEntryModel(Base):
|
|
|
227
301
|
)
|
|
228
302
|
class DatabaseTaskManagerStrategy(TaskManagerStrategy):
|
|
229
303
|
def __init__(self, db_url: str = "sqlite:///.data/db/tasks.sqlite"):
|
|
304
|
+
super().__init__()
|
|
230
305
|
self.engine = create_engine(db_url)
|
|
231
306
|
self.Session = sessionmaker(bind=self.engine)
|
|
232
307
|
Base.metadata.create_all(self.engine)
|
|
@@ -249,15 +324,15 @@ class DatabaseTaskManagerStrategy(TaskManagerStrategy):
|
|
|
249
324
|
with self.Session() as session:
|
|
250
325
|
task = session.query(TaskEntryModel).filter_by(id=id).first()
|
|
251
326
|
if task:
|
|
252
|
-
return TaskEntry(**task.__dict__)
|
|
327
|
+
return TaskEntry(**task.__dict__).status
|
|
253
328
|
return None
|
|
254
329
|
|
|
255
|
-
def get_tasks(self) -> list[TaskStatus]
|
|
330
|
+
def get_tasks(self) -> list[TaskStatus]:
|
|
256
331
|
with self.Session() as session:
|
|
257
332
|
tasks = session.query(TaskEntryModel).all()
|
|
258
333
|
if tasks:
|
|
259
334
|
return [TaskEntry(**task.__dict__).status for task in tasks]
|
|
260
|
-
return
|
|
335
|
+
return []
|
|
261
336
|
|
|
262
337
|
def remove_task(self, id: str) -> None:
|
|
263
338
|
with self.Session() as session:
|
|
@@ -286,10 +361,10 @@ router = APIRouter(prefix="/api/task", tags=["task"])
|
|
|
286
361
|
|
|
287
362
|
@router.get("/status/{id}")
|
|
288
363
|
async def _status_task(id: str) -> TaskStatus:
|
|
289
|
-
|
|
290
|
-
if not
|
|
364
|
+
task_status = task_manager.get_task(id)
|
|
365
|
+
if not task_status:
|
|
291
366
|
raise HTTPException(status_code=404, detail="Task not found")
|
|
292
|
-
return
|
|
367
|
+
return task_status
|
|
293
368
|
|
|
294
369
|
@router.get("/status")
|
|
295
370
|
async def _status_task_list() -> list[TaskStatus]:
|
|
@@ -308,4 +383,5 @@ async def _remove_task_list():
|
|
|
308
383
|
@router.get("/stats")
|
|
309
384
|
async def _stats() -> TaskStatistics:
|
|
310
385
|
return task_manager.stats()
|
|
386
|
+
|
|
311
387
|
#endregion
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/agent_description.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/models/__init__.py
RENAMED
|
File without changes
|
{ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/models/main.py
RENAMED
|
File without changes
|
{ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/tool_builder.py
RENAMED
|
File without changes
|
{ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/tool_manager.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/agent_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/faiss_helper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.11 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|