ws-bom-robot-app 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ws_bom_robot_app/config.py +30 -1
- ws_bom_robot_app/cron_manager.py +226 -74
- ws_bom_robot_app/llm/models/api.py +2 -2
- ws_bom_robot_app/llm/models/kb.py +1 -1
- ws_bom_robot_app/llm/utils/webhooks.py +1 -0
- ws_bom_robot_app/llm/vector_store/generator.py +1 -1
- ws_bom_robot_app/llm/vector_store/integration/sitemap.py +23 -20
- ws_bom_robot_app/llm/vector_store/loader/base.py +2 -1
- ws_bom_robot_app/llm/vector_store/loader/json_loader.py +3 -4
- ws_bom_robot_app/main.py +13 -2
- ws_bom_robot_app/task_manager.py +306 -70
- {ws_bom_robot_app-0.0.10.dist-info → ws_bom_robot_app-0.0.12.dist-info}/METADATA +2 -2
- {ws_bom_robot_app-0.0.10.dist-info → ws_bom_robot_app-0.0.12.dist-info}/RECORD +15 -15
- {ws_bom_robot_app-0.0.10.dist-info → ws_bom_robot_app-0.0.12.dist-info}/WHEEL +0 -0
- {ws_bom_robot_app-0.0.10.dist-info → ws_bom_robot_app-0.0.12.dist-info}/top_level.txt +0 -0
ws_bom_robot_app/config.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
from typing import Optional
|
|
1
2
|
from pydantic import BaseModel, ConfigDict
|
|
2
3
|
from pydantic_settings import BaseSettings
|
|
3
|
-
|
|
4
|
+
import os
|
|
4
5
|
|
|
5
6
|
class Settings(BaseSettings):
|
|
6
7
|
robot_env: str = 'local'
|
|
@@ -12,6 +13,7 @@ class Settings(BaseSettings):
|
|
|
12
13
|
robot_data_db_folder_out: str = 'out'
|
|
13
14
|
robot_data_db_folder_store: str = 'store'
|
|
14
15
|
robot_data_db_retention_days: float = 60
|
|
16
|
+
robot_task_max_concurrent: int = os.cpu_count() or 1
|
|
15
17
|
robot_task_retention_days: float = 1
|
|
16
18
|
robot_cms_host: str = ''
|
|
17
19
|
robot_cms_auth: str = ''
|
|
@@ -25,10 +27,37 @@ class Settings(BaseSettings):
|
|
|
25
27
|
)
|
|
26
28
|
|
|
27
29
|
class RuntimeOptions(BaseModel):
|
|
30
|
+
@staticmethod
|
|
31
|
+
def _get_number_of_workers() -> int:
|
|
32
|
+
"""
|
|
33
|
+
Returns the number of worker processes to use for the application.
|
|
34
|
+
|
|
35
|
+
This function inspects the command-line arguments to determine the number
|
|
36
|
+
of worker processes to use. It looks for the "--workers" argument and
|
|
37
|
+
returns the subsequent value as an integer.
|
|
38
|
+
Sample of command-line arguments:
|
|
39
|
+
fastapi dev main.py --port 6001
|
|
40
|
+
fastapi run main.py --port 6001 --workers 4
|
|
41
|
+
uvicorn main:app --port 6001 --workers 4
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
Optional[int]: The number of worker processes to use, or 1 if
|
|
45
|
+
the argument is not found or the value is invalid.
|
|
46
|
+
"""
|
|
47
|
+
import sys
|
|
48
|
+
try:
|
|
49
|
+
for i, arg in enumerate(sys.argv):
|
|
50
|
+
if arg == "--workers" and i + 1 < len(sys.argv):
|
|
51
|
+
return int(sys.argv[i + 1])
|
|
52
|
+
except (ValueError, IndexError):
|
|
53
|
+
pass
|
|
54
|
+
return 1
|
|
28
55
|
debug: bool
|
|
29
56
|
loader_strategy: str
|
|
30
57
|
loader_show_progress: bool
|
|
31
58
|
loader_silent_errors: bool
|
|
59
|
+
number_of_workers: int = _get_number_of_workers()
|
|
60
|
+
is_multi_process: bool = _get_number_of_workers() > 1
|
|
32
61
|
|
|
33
62
|
|
|
34
63
|
def runtime_options(self) -> RuntimeOptions:
|
ws_bom_robot_app/cron_manager.py
CHANGED
|
@@ -1,99 +1,251 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
3
|
-
import
|
|
1
|
+
from apscheduler.schedulers.background import BackgroundScheduler
|
|
2
|
+
#from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
|
3
|
+
from apscheduler.jobstores.memory import MemoryJobStore
|
|
4
|
+
from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
|
|
5
|
+
from apscheduler.triggers.cron import CronTrigger
|
|
6
|
+
from apscheduler.triggers.interval import IntervalTrigger
|
|
7
|
+
from apscheduler.triggers.date import DateTrigger
|
|
4
8
|
from fastapi import APIRouter
|
|
9
|
+
from datetime import datetime
|
|
5
10
|
from ws_bom_robot_app.task_manager import task_manager
|
|
6
11
|
from ws_bom_robot_app.llm.utils.kb import kb_cleanup_data_file
|
|
7
12
|
from ws_bom_robot_app.util import _log
|
|
8
|
-
import
|
|
9
|
-
|
|
10
|
-
class
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
13
|
+
from ws_bom_robot_app.config import config
|
|
14
|
+
|
|
15
|
+
class JobstoreStrategy:
|
|
16
|
+
def get_jobstore(self):
|
|
17
|
+
raise NotImplementedError("Subclasses should implement this method")
|
|
18
|
+
|
|
19
|
+
class MemoryJobstoreStrategy(JobstoreStrategy):
|
|
20
|
+
def get_jobstore(self):
|
|
21
|
+
_log.info("Using in-memory cron jobstore.")
|
|
22
|
+
return {"default": MemoryJobStore()}
|
|
23
|
+
|
|
24
|
+
class PersistentJobstoreStrategy(JobstoreStrategy):
|
|
25
|
+
def get_jobstore(self, db_url: str = "sqlite:///.data/db/jobs.sqlite"):
|
|
26
|
+
_log.info(f"Using persistent crob jobstore with database URL: {db_url}.")
|
|
27
|
+
return {"default": SQLAlchemyJobStore(url=db_url)}
|
|
28
|
+
|
|
29
|
+
class Job:
|
|
30
|
+
def __init__(self, name: str, job_func, args: list = None, kwargs: dict = None, cron_expression: str = None, interval: int = None, run_at: datetime = None):
|
|
31
|
+
"""
|
|
32
|
+
Job class that supports both recurring and one-time jobs.
|
|
33
|
+
:param job_func: The function to execute.
|
|
34
|
+
:param interval: Interval in seconds for recurring jobs.
|
|
35
|
+
:param run_at: Specific datetime for one-time jobs.
|
|
36
|
+
:param tags: Tags associated with the job.
|
|
37
|
+
"""
|
|
38
|
+
if not (cron_expression or interval or run_at):
|
|
39
|
+
raise ValueError("Either 'interval' or 'run_at' must be provided.")
|
|
40
|
+
self.name = name
|
|
21
41
|
self.job_func = job_func
|
|
22
|
-
self.
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
42
|
+
self.args: list = args or []
|
|
43
|
+
self.kwargs: dict = kwargs or {}
|
|
44
|
+
self.cron_expression = cron_expression
|
|
45
|
+
self.interval = interval
|
|
46
|
+
self.run_at = run_at
|
|
47
|
+
|
|
48
|
+
def create_trigger(self):
|
|
49
|
+
"""Create the appropriate trigger based on the job type."""
|
|
50
|
+
if self.cron_expression:
|
|
51
|
+
return CronTrigger.from_crontab(self.cron_expression)
|
|
52
|
+
if self.interval:
|
|
53
|
+
return IntervalTrigger(seconds=self.interval)
|
|
54
|
+
elif self.run_at:
|
|
55
|
+
return DateTrigger(run_date=self.run_at)
|
|
30
56
|
|
|
31
57
|
class CronManager:
|
|
58
|
+
_list_default = [
|
|
59
|
+
Job('cleanup-task',task_manager.cleanup_task, interval=5 * 60),
|
|
60
|
+
Job('cleanup-data',kb_cleanup_data_file, interval=180 * 60),
|
|
61
|
+
]
|
|
62
|
+
def __get_jobstore_strategy() -> JobstoreStrategy:
|
|
63
|
+
if True or config.runtime_options().is_multi_process:
|
|
64
|
+
return MemoryJobstoreStrategy()
|
|
65
|
+
return PersistentJobstoreStrategy()
|
|
66
|
+
def __init__(self, strategy: JobstoreStrategy = None, enable_defaults: bool = True):
|
|
67
|
+
self.enable_defaults = enable_defaults
|
|
68
|
+
if strategy is None:
|
|
69
|
+
strategy = CronManager.__get_jobstore_strategy()
|
|
70
|
+
jobstores = strategy.get_jobstore()
|
|
71
|
+
self.scheduler: BackgroundScheduler = BackgroundScheduler(jobstores=jobstores)
|
|
72
|
+
self.__scheduler_is_running = False
|
|
32
73
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
74
|
+
def add_job(self, job: Job):
|
|
75
|
+
"""
|
|
76
|
+
Adds a job to the scheduler with the specified name and job details.
|
|
77
|
+
Args:
|
|
78
|
+
name (str): The unique identifier for the job.
|
|
79
|
+
job (Job): An instance of the Job class containing the job details.
|
|
80
|
+
The job details include:
|
|
81
|
+
- job_func: The function to be executed.
|
|
82
|
+
- args: The positional arguments to pass to the job function.
|
|
83
|
+
- kwargs: The keyword arguments to pass to the job function.
|
|
84
|
+
- trigger: The trigger that determines when the job should be executed.
|
|
85
|
+
The job will replace any existing job with the same name.
|
|
86
|
+
Sample usage:
|
|
87
|
+
recurring_job = Job(name="sample-recurring-job",job_func=example_job, interval=5, tags=tags, args=args, kwargs=kwargs)
|
|
88
|
+
cron_manager.add_job(recurring_job)
|
|
89
|
+
fire_once_job = Job(name="sample-fire-once-job",job_func=example_job, run_at=datetime.now(), tags=tags, args=args, kwargs=kwargs)
|
|
90
|
+
cron_manager.add_job(fire_once_job)
|
|
91
|
+
"""
|
|
92
|
+
existing_job = self.scheduler.get_job(job.name)
|
|
93
|
+
if existing_job:
|
|
94
|
+
_log.info(f"Job with name '{job.name}' already exists. Skip creation.")
|
|
95
|
+
else:
|
|
96
|
+
trigger = job.create_trigger()
|
|
97
|
+
self.scheduler.add_job(
|
|
98
|
+
func=job.job_func,
|
|
99
|
+
args=job.args,
|
|
100
|
+
kwargs=job.kwargs,
|
|
101
|
+
trigger=trigger,
|
|
102
|
+
id=job.name,
|
|
103
|
+
name=job.name,
|
|
104
|
+
replace_existing=True
|
|
105
|
+
)
|
|
37
106
|
|
|
38
|
-
def __init__(self):
|
|
39
|
-
self.jobs: dict[str, RecurringJob] = CronManager._list
|
|
40
|
-
self.__scheduler_is_running = False
|
|
41
|
-
def add_job(self, name:str, job: RecurringJob):
|
|
42
|
-
job = {name: job}
|
|
43
|
-
self.jobs.append(job)
|
|
44
|
-
return job
|
|
45
|
-
def run_pending(self):
|
|
46
|
-
return schedule.run_pending()
|
|
47
|
-
def run_all(self):
|
|
48
|
-
return schedule.run_all()
|
|
49
|
-
def clear(self):
|
|
50
|
-
self.__scheduler_is_running = False
|
|
51
|
-
return schedule.clear()
|
|
52
|
-
def get_jobs(self):
|
|
53
|
-
return schedule.get_jobs()
|
|
54
107
|
def start(self):
|
|
55
|
-
def _target():
|
|
56
|
-
while self.__scheduler_is_running:
|
|
57
|
-
time.sleep(1)
|
|
58
|
-
self.run_pending()
|
|
59
|
-
time.sleep(59)
|
|
60
|
-
_log.info(f"__scheduler_is_running={self.__scheduler_is_running}")
|
|
61
|
-
#clear all jobs
|
|
62
|
-
self.clear()
|
|
63
|
-
#prepare jobs
|
|
64
|
-
for job in self.jobs.values():
|
|
65
|
-
job.run()
|
|
66
|
-
#start scheduler
|
|
67
108
|
if not self.__scheduler_is_running:
|
|
68
109
|
self.__scheduler_is_running = True
|
|
69
|
-
|
|
70
|
-
|
|
110
|
+
self.scheduler.start()
|
|
111
|
+
if self.enable_defaults and CronManager._list_default:
|
|
112
|
+
for job in CronManager._list_default:
|
|
113
|
+
existing_job = self.scheduler.get_job(job.name)
|
|
114
|
+
if existing_job is None:
|
|
115
|
+
self.add_job(job)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def get_job(self, job_id: str):
|
|
119
|
+
return self.scheduler.get_job(job_id)
|
|
120
|
+
|
|
121
|
+
def get_jobs(self):
|
|
122
|
+
return self.scheduler.get_jobs()
|
|
123
|
+
|
|
124
|
+
def execute_job(self, job_id: str):
|
|
125
|
+
job = self.scheduler.get_job(job_id)
|
|
126
|
+
if job:
|
|
127
|
+
job.func()
|
|
128
|
+
else:
|
|
129
|
+
raise ValueError(f"Job with id '{job_id}' not found.")
|
|
130
|
+
|
|
131
|
+
def pause_job(self, job_id: str):
|
|
132
|
+
self.scheduler.pause_job(job_id)
|
|
133
|
+
|
|
134
|
+
def resume_job(self, job_id: str):
|
|
135
|
+
self.scheduler.resume_job(job_id)
|
|
136
|
+
|
|
137
|
+
def remove_job(self, job_id: str):
|
|
138
|
+
self.scheduler.remove_job(job_id)
|
|
139
|
+
|
|
140
|
+
def execute_recurring_jobs(self):
|
|
141
|
+
for job in self.scheduler.get_jobs():
|
|
142
|
+
if job.interval:
|
|
143
|
+
job.job_func()
|
|
144
|
+
|
|
145
|
+
def pause_recurring_jobs(self):
|
|
146
|
+
for job in self.scheduler.get_jobs():
|
|
147
|
+
if job.interval:
|
|
148
|
+
self.pause_job(job.id)
|
|
149
|
+
|
|
150
|
+
def resume_recurring_jobs(self):
|
|
151
|
+
for job in self.scheduler.get_jobs():
|
|
152
|
+
if job.interval:
|
|
153
|
+
self.resume_job(job.id)
|
|
154
|
+
|
|
155
|
+
def remove_recurring_jobs(self):
|
|
156
|
+
for job in self.scheduler.get_jobs():
|
|
157
|
+
if job.interval:
|
|
158
|
+
self.remove_job(job.id)
|
|
159
|
+
|
|
160
|
+
def clear(self):
|
|
161
|
+
self.__scheduler_is_running = False
|
|
162
|
+
self.scheduler.remove_all_jobs()
|
|
163
|
+
|
|
164
|
+
def shutdown(self):
|
|
165
|
+
self.scheduler.shutdown()
|
|
71
166
|
|
|
72
167
|
cron_manager = CronManager()
|
|
73
168
|
|
|
169
|
+
# FastAPI Routes
|
|
74
170
|
router = APIRouter(prefix="/api/cron", tags=["cron"])
|
|
171
|
+
|
|
75
172
|
@router.get("/list")
|
|
76
173
|
def _list():
|
|
77
|
-
def __format(job
|
|
174
|
+
def __format(job):
|
|
175
|
+
return {
|
|
176
|
+
"id": job.id,
|
|
177
|
+
"name": job.name,
|
|
178
|
+
"func": job.func_ref,
|
|
179
|
+
"pending": job.pending,
|
|
180
|
+
"trigger": str(job.trigger),
|
|
181
|
+
"next_run_time": job.next_run_time
|
|
182
|
+
}
|
|
183
|
+
return [__format(job) for job in cron_manager.get_jobs()]
|
|
184
|
+
|
|
185
|
+
@router.get("/default-jobs")
|
|
186
|
+
def _default_jobs():
|
|
187
|
+
def __format(job):
|
|
188
|
+
existing_job = cron_manager.scheduler.get_job(job.name)
|
|
78
189
|
return {
|
|
79
|
-
"
|
|
80
|
-
"
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
190
|
+
"name": job.name,
|
|
191
|
+
"status": "exists" if existing_job else "not added"
|
|
192
|
+
}
|
|
193
|
+
return [__format(job) for job in CronManager._list_default]
|
|
194
|
+
|
|
195
|
+
@router.post("/execute-job/{job_id}")
|
|
196
|
+
def _execute_job(job_id: str):
|
|
197
|
+
try:
|
|
198
|
+
cron_manager.execute_job(job_id)
|
|
199
|
+
return {"status": f"Job {job_id} executed"}
|
|
200
|
+
except ValueError as e:
|
|
201
|
+
return {"error": str(e)}
|
|
202
|
+
|
|
203
|
+
@router.post("/pause-job/{job_id}")
|
|
204
|
+
def _pause_job(job_id: str):
|
|
205
|
+
cron_manager.pause_job(job_id)
|
|
206
|
+
return {"status": f"Job {job_id} paused"}
|
|
207
|
+
|
|
208
|
+
@router.post("/resume-job/{job_id}")
|
|
209
|
+
def _resume_job(job_id: str):
|
|
210
|
+
cron_manager.resume_job(job_id)
|
|
211
|
+
return {"status": f"Job {job_id} resumed"}
|
|
212
|
+
|
|
213
|
+
@router.delete("/remove-job/{job_id}")
|
|
214
|
+
def _remove_job(job_id: str):
|
|
215
|
+
cron_manager.remove_job(job_id)
|
|
216
|
+
return {"status": f"Job {job_id} removed"}
|
|
217
|
+
|
|
218
|
+
@router.post("/execute-recurring")
|
|
219
|
+
def _execute_recurring():
|
|
220
|
+
cron_manager.execute_recurring_jobs()
|
|
221
|
+
return {"status": "All recurring jobs executed"}
|
|
222
|
+
|
|
223
|
+
@router.post("/pause-recurring")
|
|
224
|
+
def _pause_recurring():
|
|
225
|
+
cron_manager.pause_recurring_jobs()
|
|
226
|
+
return {"status": "All recurring jobs paused"}
|
|
227
|
+
|
|
228
|
+
@router.post("/resume-recurring")
|
|
229
|
+
def _resume_recurring():
|
|
230
|
+
cron_manager.resume_recurring_jobs()
|
|
231
|
+
return {"status": "All recurring jobs resumed"}
|
|
232
|
+
|
|
233
|
+
@router.delete("/remove-recurring")
|
|
234
|
+
def _remove_recurring():
|
|
235
|
+
cron_manager.remove_recurring_jobs()
|
|
236
|
+
return {"status": "All recurring jobs removed"}
|
|
87
237
|
|
|
88
238
|
@router.get("/start")
|
|
89
239
|
def _start():
|
|
90
240
|
cron_manager.start()
|
|
241
|
+
return {"status": "started"}
|
|
242
|
+
|
|
91
243
|
@router.delete("/stop")
|
|
92
244
|
def _stop():
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
return {"
|
|
245
|
+
cron_manager.clear()
|
|
246
|
+
return {"status": "stopped"}
|
|
247
|
+
|
|
248
|
+
@router.get("/shutdown")
|
|
249
|
+
def _shutdown():
|
|
250
|
+
cron_manager.shutdown()
|
|
251
|
+
return {"status": "shutdown"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List, Dict, Optional
|
|
1
|
+
from typing import List, Dict, Optional, Union
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from pydantic import AliasChoices, BaseModel, Field, ConfigDict
|
|
4
4
|
from ws_bom_robot_app.llm.models.kb import LlmKbEndpoint, LlmKbIntegration
|
|
@@ -125,7 +125,7 @@ class VectorDbRequest(BaseModel):
|
|
|
125
125
|
def api_key(self):
|
|
126
126
|
return self.secrets.get("openAIApiKey", "")
|
|
127
127
|
def out_name(self):
|
|
128
|
-
return "
|
|
128
|
+
return f"db_{datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f")[:-3]}_{os.getpid()}"
|
|
129
129
|
|
|
130
130
|
class RulesRequest(VectorDbRequest):
|
|
131
131
|
type: Optional[str] = 'rules'
|
|
@@ -144,7 +144,7 @@ async def load_endpoints(endpoints: list[LlmKbEndpoint], destination_directory:
|
|
|
144
144
|
documents = await JsonLoader(
|
|
145
145
|
file_path,
|
|
146
146
|
meta_fields=[field.name for field in endpoint.fields_mapping.meta_fields] if endpoint.fields_mapping.meta_fields else []
|
|
147
|
-
).
|
|
147
|
+
).aload()
|
|
148
148
|
_documents.extend(documents)
|
|
149
149
|
await aiofiles.os.remove(file_path)
|
|
150
150
|
except Exception as e:
|
|
@@ -9,7 +9,7 @@ from ws_bom_robot_app.llm.vector_store.integration.manager import IntegrationMan
|
|
|
9
9
|
from ws_bom_robot_app.llm.utils.faiss_helper import FaissHelper
|
|
10
10
|
from ws_bom_robot_app.util import timer
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
#@timer
|
|
13
13
|
async def rules(rq: RulesRequest) -> VectorDbResponse:
|
|
14
14
|
api_key = rq.api_key()
|
|
15
15
|
_config = rq.config()
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from typing import Any
|
|
1
2
|
import aiofiles
|
|
2
3
|
import aiofiles.os
|
|
3
4
|
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
|
|
@@ -5,23 +6,21 @@ from langchain_community.document_loaders.sitemap import SitemapLoader
|
|
|
5
6
|
from langchain_community.document_transformers import MarkdownifyTransformer as markdownify
|
|
6
7
|
from langchain_core.documents import Document
|
|
7
8
|
from bs4 import BeautifulSoup, Tag
|
|
8
|
-
import nest_asyncio, os
|
|
9
|
-
|
|
10
9
|
|
|
11
10
|
class Sitemap(IntegrationStrategy):
|
|
12
|
-
"""
|
|
11
|
+
"""Class to load a sitemap.xml file and extract text from the URLs.
|
|
13
12
|
Load a sitemap.xml file and extract text from the urls.
|
|
14
13
|
Args:
|
|
15
14
|
data (dict[str, str]):
|
|
16
15
|
data["sitemapUrl"] (str): absolute/relative url of the sitemap.xml
|
|
17
16
|
data["outputFormat"] (str): ["text", "html", "markdown"] default to "text"
|
|
18
|
-
data["filterUrls"] list: list of regex pattern to filter urls ["https://www.example.com/en/products", "^.*products.*$"]
|
|
19
|
-
data["includeOnlySelector"] : [".content", "#main-article", "article p"]
|
|
20
|
-
data["excludeTag"] (str): default to ["script", "noscript", "style", "head", "header","nav","footer", "iframe"]
|
|
21
|
-
data["excludeClass"] (str): ["class1", "class2"]
|
|
22
|
-
data["excludeId"] (str): ["id1", "id2"]
|
|
17
|
+
data["filterUrls"] list[str]: list of regex pattern to filter urls ["https://www.example.com/en/products", "^.*products.*$"]
|
|
18
|
+
data["includeOnlySelector"] : list[str] [".content", "#main-article", "article p"]
|
|
19
|
+
data["excludeTag"] (list[str]): default to ["script", "noscript", "style", "head", "header","nav","footer", "iframe"]
|
|
20
|
+
data["excludeClass"] (list[str]): ["class1", "class2"]
|
|
21
|
+
data["excludeId"] (list[str]): ["id1", "id2"]
|
|
23
22
|
"""
|
|
24
|
-
def __init__(self, knowledgebase_path: str, data: dict[str,
|
|
23
|
+
def __init__(self, knowledgebase_path: str, data: dict[str, Any]):
|
|
25
24
|
super().__init__(knowledgebase_path, data)
|
|
26
25
|
self.__sitemap_url = self.data.get("sitemapUrl")
|
|
27
26
|
self.__filter_urls: list[str] = self.data.get("filterUrls",[]) # type: ignore
|
|
@@ -30,12 +29,12 @@ class Sitemap(IntegrationStrategy):
|
|
|
30
29
|
self.__exclude_tag: list[str] = self.data.get("excludeTag",[]) # type: ignore
|
|
31
30
|
self.__exclude_class: list[str] = self.data.get("excludeClass",[]) # type: ignore
|
|
32
31
|
self.__exclude_id: list[str] = self.data.get("excludeId",[]) # type: ignore
|
|
33
|
-
def working_subdirectory(self) -> str:
|
|
32
|
+
def working_subdirectory(self) -> str:
|
|
34
33
|
return ""
|
|
35
34
|
def _extract(self, tag: Tag) -> str:
|
|
36
35
|
return tag.get_text() if self.__output_format == "text" else tag.prettify()
|
|
37
36
|
def _output(self, documents: list[Document]) -> list[Document]:
|
|
38
|
-
return list(markdownify().transform_documents(documents)) if
|
|
37
|
+
return list(markdownify().transform_documents(documents)) if self.__output_format == "markdown" else documents
|
|
39
38
|
def _parse(self,content: BeautifulSoup) -> str:
|
|
40
39
|
if self.__include_only_selectors:
|
|
41
40
|
extracted = []
|
|
@@ -55,21 +54,25 @@ class Sitemap(IntegrationStrategy):
|
|
|
55
54
|
for _ in content.select(element):
|
|
56
55
|
_.decompose()
|
|
57
56
|
return str(self._extract(content))
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
return not url.startswith("http")
|
|
61
|
-
def _remap_if_local(url: str) -> str:
|
|
62
|
-
return f"{self.knowledgebase_path}/{url}" if _is_local(url) else url
|
|
57
|
+
def _is_local(self, url: str) -> bool:
|
|
58
|
+
return not url.startswith("http")
|
|
63
59
|
|
|
60
|
+
def _remap_if_local(self, url: str) -> str:
|
|
61
|
+
return f"{self.knowledgebase_path}/{url}" if self._is_local(url) else url
|
|
62
|
+
|
|
63
|
+
async def load(self) -> list[Document]:
|
|
64
64
|
if (self.__sitemap_url):
|
|
65
65
|
_loader = SitemapLoader(
|
|
66
|
-
web_path=_remap_if_local(self.__sitemap_url),
|
|
66
|
+
web_path=self._remap_if_local(self.__sitemap_url),
|
|
67
67
|
filter_urls=self.__filter_urls,
|
|
68
68
|
parsing_function=self._parse,
|
|
69
|
-
is_local=_is_local(self.__sitemap_url)
|
|
69
|
+
is_local=self._is_local(self.__sitemap_url)
|
|
70
70
|
)
|
|
71
71
|
_docs = self._output([document async for document in _loader.alazy_load()])
|
|
72
|
-
if _is_local(self.__sitemap_url):
|
|
73
|
-
|
|
72
|
+
if self._is_local(self.__sitemap_url):
|
|
73
|
+
try:
|
|
74
|
+
await aiofiles.os.remove(_loader.web_path)
|
|
75
|
+
except FileNotFoundError:
|
|
76
|
+
pass
|
|
74
77
|
return _docs
|
|
75
78
|
return []
|
|
@@ -2,7 +2,6 @@ import json
|
|
|
2
2
|
from typing import Optional
|
|
3
3
|
from langchain_core.documents import Document
|
|
4
4
|
from langchain_community.document_loaders.base import BaseLoader
|
|
5
|
-
import aiofiles
|
|
6
5
|
|
|
7
6
|
class JsonLoader(BaseLoader):
|
|
8
7
|
def __init__(self, file_path: str, meta_fields:Optional[list[str]] = [],encoding: Optional[str] = "utf-8"):
|
|
@@ -10,9 +9,9 @@ class JsonLoader(BaseLoader):
|
|
|
10
9
|
self.meta_fields = meta_fields
|
|
11
10
|
self.encoding = encoding
|
|
12
11
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
data = json.
|
|
12
|
+
def load(self) -> list[Document]:
|
|
13
|
+
with open(self.file_path, "r", encoding=self.encoding) as file:
|
|
14
|
+
data = json.load(file)
|
|
16
15
|
_list = data if isinstance(data, list) else [data]
|
|
17
16
|
return [
|
|
18
17
|
Document(
|
ws_bom_robot_app/main.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
import platform
|
|
3
|
+
from fastapi.responses import FileResponse
|
|
3
4
|
import uvicorn, os, sys
|
|
4
5
|
from fastapi import FastAPI, Depends
|
|
5
6
|
from fastapi.openapi.docs import get_swagger_ui_html
|
|
@@ -22,7 +23,10 @@ app.include_router(cron,dependencies=[Depends(authenticate)])
|
|
|
22
23
|
|
|
23
24
|
@app.get("/")
|
|
24
25
|
async def root():
|
|
25
|
-
return
|
|
26
|
+
return health()
|
|
27
|
+
@app.get("/favicon.ico")
|
|
28
|
+
async def favicon():
|
|
29
|
+
return FileResponse("./favicon.ico")
|
|
26
30
|
|
|
27
31
|
@app.get("/docs", include_in_schema=False)
|
|
28
32
|
async def get_swagger_documentation(authenticate: bool = Depends(authenticate)):
|
|
@@ -31,7 +35,11 @@ async def get_swagger_documentation(authenticate: bool = Depends(authenticate)):
|
|
|
31
35
|
async def openapi(authenticate: bool = Depends(authenticate)):
|
|
32
36
|
return get_openapi(title=app.title, version=app.version, routes=app.routes)
|
|
33
37
|
|
|
34
|
-
@app.get("/
|
|
38
|
+
@app.get("/api/health",tags=["diag"])
|
|
39
|
+
def health():
|
|
40
|
+
return {"status": "ok"}
|
|
41
|
+
|
|
42
|
+
@app.get("/api/diag",tags=["diag"])
|
|
35
43
|
def diag(authenticate: bool = Depends(authenticate)):
|
|
36
44
|
import pkg_resources
|
|
37
45
|
from ws_bom_robot_app.llm.vector_store.loader.base import Loader as wsll
|
|
@@ -48,6 +56,7 @@ def diag(authenticate: bool = Depends(authenticate)):
|
|
|
48
56
|
"version": platform.version(),
|
|
49
57
|
"type": platform.machine(),
|
|
50
58
|
"processor": platform.processor(),
|
|
59
|
+
"cpu": os.cpu_count(),
|
|
51
60
|
"architecture": platform.architecture()
|
|
52
61
|
},
|
|
53
62
|
"sys": {
|
|
@@ -57,6 +66,7 @@ def diag(authenticate: bool = Depends(authenticate)):
|
|
|
57
66
|
"args": {k: arg for k, arg in enumerate(sys.argv)}
|
|
58
67
|
},
|
|
59
68
|
"os": {
|
|
69
|
+
"ppid": os.getppid(),
|
|
60
70
|
"pid": os.getpid(),
|
|
61
71
|
"cwd": os.getcwd(),
|
|
62
72
|
"ws_bom_robot_app": pkg_resources.get_distribution("ws_bom_robot_app").version,
|
|
@@ -64,6 +74,7 @@ def diag(authenticate: bool = Depends(authenticate)):
|
|
|
64
74
|
},
|
|
65
75
|
},
|
|
66
76
|
"config":config,
|
|
77
|
+
"runtime":config.runtime_options(),
|
|
67
78
|
"extension": {
|
|
68
79
|
"loader": ({item[0]: item[1].loader.__name__ if item[1] else None} for item in sorted(wsll._list.items(), key=lambda x: x[0]) if item[1]),
|
|
69
80
|
"integration":({item[0]: type(item[1]).__name__} for item in wsim._list.items()),
|
ws_bom_robot_app/task_manager.py
CHANGED
|
@@ -1,53 +1,78 @@
|
|
|
1
|
+
from collections import deque
|
|
1
2
|
import inspect
|
|
3
|
+
from math import floor
|
|
2
4
|
import asyncio, os
|
|
3
5
|
from datetime import datetime, timedelta
|
|
4
6
|
from enum import Enum
|
|
5
|
-
from typing import Annotated, TypeVar, Optional, Dict, Union, Any
|
|
7
|
+
from typing import Annotated, Coroutine, Literal, TypeVar, Optional, Dict, Union, Any
|
|
6
8
|
from pydantic import BaseModel, ConfigDict, Field, computed_field
|
|
7
9
|
from uuid import uuid4
|
|
8
10
|
from fastapi import APIRouter, HTTPException
|
|
9
11
|
from ws_bom_robot_app.config import config
|
|
10
12
|
from ws_bom_robot_app.llm.models.base import IdentifiableEntity
|
|
11
13
|
from ws_bom_robot_app.llm.utils.webhooks import WebhookNotifier
|
|
14
|
+
from ws_bom_robot_app.util import _log
|
|
15
|
+
from sqlalchemy import create_engine, Column, String, JSON, DateTime, Enum
|
|
16
|
+
from sqlalchemy.orm import sessionmaker, registry
|
|
17
|
+
from abc import ABC, abstractmethod
|
|
12
18
|
|
|
13
19
|
T = TypeVar('T')
|
|
14
20
|
|
|
21
|
+
#region models
|
|
15
22
|
class TaskHeader(BaseModel):
|
|
23
|
+
"""
|
|
24
|
+
TaskHeader model representing the header information for a task.
|
|
25
|
+
Example:
|
|
26
|
+
```bash
|
|
27
|
+
curl -X POST "http://localhost:6001/api/llm/kb/task"
|
|
28
|
+
-H "x-ws-bom-msg-id: 1234"
|
|
29
|
+
-H "x-ws-bom-msg-type: generate.knowledgebase"
|
|
30
|
+
-H "x-ws-bom-msg-extra: key1=value1,key2=value2"
|
|
31
|
+
-H "x-ws-bom-webhooks: http://localhost:8000/api/webhook"
|
|
32
|
+
-d "{\"api_key\":\"string\"}"
|
|
33
|
+
```
|
|
34
|
+
Attributes:
|
|
35
|
+
x_ws_bom_msg_id (Optional[str]): The message ID for the task. If not provided, a UUID will be generated.
|
|
36
|
+
x_ws_bom_msg_type (Optional[str]): The message type for the task, e.g. "send.email" or "generate.knowledgebase".
|
|
37
|
+
x_ws_bom_msg_extra (Optional[str]): Any extra information for the task, in comma separated key=value pairs. e.g. "key1=value1,key2=value2".
|
|
38
|
+
x_ws_bom_webhooks (Optional[str]): Webhooks associated with the task, called when the task is completed or failed.
|
|
39
|
+
"""
|
|
40
|
+
x_ws_bom_msg_id: Optional[str] = None
|
|
16
41
|
x_ws_bom_msg_type: Optional[str] = None
|
|
42
|
+
x_ws_bom_msg_extra: Optional[str] = None
|
|
17
43
|
x_ws_bom_webhooks: Optional[str] = None
|
|
18
44
|
model_config = ConfigDict(
|
|
19
45
|
extra='allow'
|
|
20
46
|
)
|
|
21
47
|
|
|
22
48
|
class TaskMetaData(BaseModel):
|
|
23
|
-
|
|
24
|
-
|
|
49
|
+
created_at: str
|
|
50
|
+
start_at: Optional[str] = None
|
|
51
|
+
end_at: Optional[str] = None
|
|
25
52
|
@computed_field
|
|
26
53
|
@property
|
|
27
|
-
def elapsed_time(self) -> Union[
|
|
28
|
-
return (
|
|
54
|
+
def elapsed_time(self) -> Union[str, None]:
|
|
55
|
+
return str(
|
|
56
|
+
(datetime.now() if not self.end_at else datetime.fromisoformat(self.end_at))
|
|
57
|
+
- datetime.fromisoformat(self.created_at if not self.start_at else self.start_at)
|
|
58
|
+
)
|
|
29
59
|
source: Optional[str] = None
|
|
30
60
|
pid: Optional[int] = None
|
|
31
|
-
|
|
32
|
-
json_encoders={
|
|
33
|
-
datetime: lambda v: v.isoformat(),
|
|
34
|
-
timedelta: lambda v: str(v)
|
|
35
|
-
}
|
|
36
|
-
)
|
|
61
|
+
extra: Optional[dict[str,str]] = None
|
|
37
62
|
|
|
38
63
|
class TaskStatus(IdentifiableEntity):
|
|
39
|
-
class TaskStatusEnum(str, Enum):
|
|
40
|
-
pending = "pending"
|
|
41
|
-
completed = "completed"
|
|
42
|
-
failure = "failure"
|
|
43
64
|
type: Optional[str] = None
|
|
44
|
-
status:
|
|
65
|
+
status: Literal["pending", "completed", "failure"]
|
|
45
66
|
result: Optional[T] = None
|
|
46
67
|
metadata: TaskMetaData = None
|
|
47
68
|
error: Optional[str] = None
|
|
69
|
+
model_config = ConfigDict(
|
|
70
|
+
arbitrary_types_allowed=True
|
|
71
|
+
)
|
|
48
72
|
|
|
49
73
|
class TaskEntry(IdentifiableEntity):
|
|
50
74
|
task: Annotated[asyncio.Task, Field(default=None, validate_default=False)] = None
|
|
75
|
+
coroutine: Coroutine = None
|
|
51
76
|
headers: TaskHeader | None = None
|
|
52
77
|
status: Union[TaskStatus, None] = None
|
|
53
78
|
def _get_coroutine_name(self, coroutine: asyncio.coroutines) -> str:
|
|
@@ -55,6 +80,15 @@ class TaskEntry(IdentifiableEntity):
|
|
|
55
80
|
return coroutine.cr_code.co_name
|
|
56
81
|
return "<unknown>"
|
|
57
82
|
def __init__(self, **data):
|
|
83
|
+
def _metadata_extra(data: str) -> dict[str,str] | None:
|
|
84
|
+
if data:
|
|
85
|
+
_values = data.split(",")
|
|
86
|
+
if _values:
|
|
87
|
+
try:
|
|
88
|
+
return {k: v for k,v in [val.split("=") for val in _values]}
|
|
89
|
+
except Exception as e:
|
|
90
|
+
return None
|
|
91
|
+
return None
|
|
58
92
|
#separate task from data to handle asyncio.Task
|
|
59
93
|
task = data.pop('task',None)
|
|
60
94
|
super().__init__(**data)
|
|
@@ -64,88 +98,290 @@ class TaskEntry(IdentifiableEntity):
|
|
|
64
98
|
if not self.status:
|
|
65
99
|
self.status = TaskStatus(
|
|
66
100
|
id=self.id,
|
|
67
|
-
type=self.headers.x_ws_bom_msg_type if self.headers and self.headers.x_ws_bom_msg_type else self._get_coroutine_name(
|
|
68
|
-
status=
|
|
101
|
+
type=self.headers.x_ws_bom_msg_type if self.headers and self.headers.x_ws_bom_msg_type else self._get_coroutine_name(self.coroutine) if self.coroutine else None,
|
|
102
|
+
status="pending",
|
|
69
103
|
metadata=TaskMetaData(
|
|
70
|
-
|
|
71
|
-
source=self._get_coroutine_name(
|
|
72
|
-
pid=os.getpid()
|
|
104
|
+
created_at=str(datetime.now().isoformat()),
|
|
105
|
+
source=self._get_coroutine_name(self.coroutine) if self.coroutine else None,
|
|
106
|
+
pid=os.getpid(),
|
|
107
|
+
extra=_metadata_extra(self.headers.x_ws_bom_msg_extra) if self.headers and self.headers.x_ws_bom_msg_extra else None
|
|
108
|
+
)
|
|
73
109
|
)
|
|
74
110
|
model_config = ConfigDict(
|
|
75
111
|
arbitrary_types_allowed=True,
|
|
76
112
|
validate_assignment=True
|
|
77
113
|
)
|
|
78
114
|
|
|
79
|
-
class
|
|
80
|
-
|
|
81
|
-
|
|
115
|
+
class TaskStatistics(BaseModel):
|
|
116
|
+
class TaskStatisticExecutionInfo(BaseModel):
|
|
117
|
+
retention_days: float = config.robot_task_retention_days
|
|
118
|
+
max_concurrent: int
|
|
119
|
+
running: list[TaskStatus]
|
|
120
|
+
slowest: list
|
|
121
|
+
class TaskStatisticExecutionTime(BaseModel):
|
|
122
|
+
min: str
|
|
123
|
+
max: str
|
|
124
|
+
avg: str
|
|
125
|
+
total: int
|
|
126
|
+
pending: int
|
|
127
|
+
completed: int
|
|
128
|
+
failure: int
|
|
129
|
+
exec_time: TaskStatisticExecutionTime
|
|
130
|
+
exec_info: TaskStatisticExecutionInfo
|
|
131
|
+
|
|
132
|
+
#endregion
|
|
133
|
+
|
|
134
|
+
#region interface
|
|
135
|
+
class TaskManagerStrategy(ABC):
|
|
136
|
+
def __init__(self, max_concurrent_tasks: int = floor(2 * config.robot_task_max_concurrent / config.runtime_options().number_of_workers)):
|
|
137
|
+
self.max_concurrent_tasks = max_concurrent_tasks
|
|
138
|
+
self.semaphore = asyncio.Semaphore(self.max_concurrent_tasks)
|
|
139
|
+
self.running_tasks = dict[str, TaskEntry]()
|
|
140
|
+
self.loop = asyncio.get_event_loop()
|
|
141
|
+
|
|
142
|
+
@abstractmethod
|
|
143
|
+
def create_task(self, coroutine, headers: TaskHeader | None = None) -> IdentifiableEntity:
|
|
144
|
+
pass
|
|
145
|
+
|
|
146
|
+
@abstractmethod
|
|
147
|
+
def update_task_status(self, task: TaskEntry) -> None:
|
|
148
|
+
"""Hook for additional behavior, such as persisting the task status."""
|
|
149
|
+
pass
|
|
150
|
+
|
|
151
|
+
@abstractmethod
|
|
152
|
+
def get_task(self, id: str) -> TaskStatus | None:
|
|
153
|
+
pass
|
|
154
|
+
|
|
155
|
+
@abstractmethod
|
|
156
|
+
def get_tasks(self) -> list[TaskStatus]:
|
|
157
|
+
pass
|
|
82
158
|
|
|
83
|
-
|
|
159
|
+
@abstractmethod
|
|
160
|
+
def remove_task(self, id: str) -> None:
|
|
161
|
+
pass
|
|
162
|
+
|
|
163
|
+
@abstractmethod
|
|
164
|
+
def cleanup_task(self) -> None:
|
|
165
|
+
pass
|
|
166
|
+
|
|
167
|
+
@abstractmethod
|
|
168
|
+
def stats(self) -> TaskStatistics:
|
|
169
|
+
pass
|
|
170
|
+
|
|
171
|
+
def task_cleanup_rule(self, task: TaskEntry) -> bool:
|
|
172
|
+
return task.status.status in {"completed", "failure"} and datetime.fromisoformat(task.status.metadata.end_at) < datetime.now() - timedelta(days=config.robot_task_retention_days)
|
|
173
|
+
|
|
174
|
+
def task_done_callback(self, task_entry: TaskEntry, headers: TaskHeader | None = None) -> callable:
|
|
84
175
|
def callback(task: asyncio.Task):
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
176
|
+
try:
|
|
177
|
+
result = task_entry.task.result()
|
|
178
|
+
task_entry.status.status = "completed"
|
|
179
|
+
task_entry.status.result = result
|
|
180
|
+
except Exception as e:
|
|
181
|
+
task_entry.status.status = "failure"
|
|
182
|
+
task_entry.status.error = str(e)
|
|
183
|
+
finally:
|
|
184
|
+
task_entry.status.metadata.end_at = str(datetime.now().isoformat())
|
|
185
|
+
#strategy-specific behavior
|
|
186
|
+
self.update_task_status(task_entry)
|
|
187
|
+
#notify webhooks
|
|
188
|
+
if headers and headers.x_ws_bom_webhooks:
|
|
189
|
+
asyncio.create_task(
|
|
190
|
+
WebhookNotifier().notify_webhook(task_entry.status,headers.x_ws_bom_webhooks)
|
|
191
|
+
)
|
|
99
192
|
return callback
|
|
100
193
|
|
|
101
|
-
def
|
|
102
|
-
|
|
194
|
+
def create_task_entry(self, coroutine: asyncio.coroutines, headers: TaskHeader | None = None) -> TaskEntry:
|
|
195
|
+
_id = headers and headers.x_ws_bom_msg_id or str(uuid4())
|
|
103
196
|
task = TaskEntry(
|
|
104
|
-
id=
|
|
105
|
-
|
|
197
|
+
id=_id,
|
|
198
|
+
coroutine=coroutine,
|
|
106
199
|
headers=headers)
|
|
107
|
-
|
|
200
|
+
self.loop.create_task(self._run_task_with_semaphore(task)) # run the task
|
|
201
|
+
return task
|
|
202
|
+
|
|
203
|
+
async def _run_task_with_semaphore(self, task_entry: TaskEntry):
|
|
204
|
+
"""Run a task with semaphore control to limit concurrency."""
|
|
205
|
+
async with self.semaphore:
|
|
206
|
+
self.running_tasks[task_entry.id]=task_entry
|
|
207
|
+
await self._execute_task(task_entry)
|
|
208
|
+
del self.running_tasks[task_entry.id]
|
|
209
|
+
|
|
210
|
+
async def _execute_task(self, task_entry: TaskEntry):
|
|
211
|
+
"""Execute a task and handle its lifecycle."""
|
|
212
|
+
task_entry.status.metadata.start_at = str(datetime.now().isoformat())
|
|
213
|
+
task_entry.task = asyncio.create_task(task_entry.coroutine)
|
|
214
|
+
task_entry.task.add_done_callback(self.task_done_callback(task_entry, task_entry.headers))
|
|
215
|
+
await task_entry.task
|
|
216
|
+
|
|
217
|
+
def running_task(self):
|
|
218
|
+
return self.running_tasks.values()
|
|
219
|
+
def stats(self) -> TaskStatistics:
|
|
220
|
+
def __string_to_timedelta(value: str) -> timedelta:
|
|
221
|
+
if "." in value:
|
|
222
|
+
time_format = "%H:%M:%S.%f"
|
|
223
|
+
else:
|
|
224
|
+
time_format = "%H:%M:%S"
|
|
225
|
+
time_obj = datetime.strptime(value, time_format)
|
|
226
|
+
return timedelta(hours=time_obj.hour, minutes=time_obj.minute, seconds=time_obj.second, microseconds=time_obj.microsecond)
|
|
227
|
+
def __timedelta_to_string(td):
|
|
228
|
+
hours, remainder = divmod(td.total_seconds(), 3600)
|
|
229
|
+
minutes, seconds = divmod(remainder, 60)
|
|
230
|
+
return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}.{td.microseconds}"
|
|
231
|
+
_all = self.get_tasks()
|
|
232
|
+
_not_pending = _all and [task for task in _all if task.status != "pending"] or []
|
|
233
|
+
_total_not_pending = _not_pending and len(_not_pending) if _not_pending else 0
|
|
234
|
+
elapsed_times = _not_pending and [__string_to_timedelta(task.metadata.elapsed_time) for task in _not_pending]
|
|
235
|
+
_avg_exec_time = sum(elapsed_times, timedelta()) / _total_not_pending if elapsed_times and _total_not_pending > 0 else timedelta()
|
|
236
|
+
_min_exec_time = min(elapsed_times) if elapsed_times and _total_not_pending > 0 else timedelta()
|
|
237
|
+
_max_exec_time = max(elapsed_times) if elapsed_times and _total_not_pending > 0 else timedelta()
|
|
238
|
+
_slowest: list[TaskStatus] = _not_pending and sorted(_not_pending, key=lambda x: __string_to_timedelta(x.metadata.elapsed_time), reverse=True)[:3]
|
|
239
|
+
return TaskStatistics(
|
|
240
|
+
total= _all and len(_all) or 0,
|
|
241
|
+
pending=_all and len([task for task in _all if task.status == "pending"]) or 0,
|
|
242
|
+
completed=_all and len([task for task in _all if task.status == "completed"]) or 0,
|
|
243
|
+
failure=_all and len([task for task in _all if task.status == "failure"]) or 0,
|
|
244
|
+
exec_time=TaskStatistics.TaskStatisticExecutionTime(
|
|
245
|
+
min=__timedelta_to_string(_min_exec_time),
|
|
246
|
+
max=__timedelta_to_string(_max_exec_time),
|
|
247
|
+
avg=__timedelta_to_string(_avg_exec_time)
|
|
248
|
+
),
|
|
249
|
+
exec_info=TaskStatistics.TaskStatisticExecutionInfo(
|
|
250
|
+
retention_days=config.robot_task_retention_days,
|
|
251
|
+
max_concurrent=self.max_concurrent_tasks,
|
|
252
|
+
running=[task.status for task in self.running_task()],
|
|
253
|
+
slowest=_slowest
|
|
254
|
+
)
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
#endregion
|
|
258
|
+
|
|
259
|
+
#memory implementation
|
|
260
|
+
class MemoryTaskManagerStrategy(TaskManagerStrategy):
|
|
261
|
+
def __init__(self):
|
|
262
|
+
super().__init__()
|
|
263
|
+
self.tasks: Dict[str, TaskEntry] = {}
|
|
264
|
+
|
|
265
|
+
def create_task(self, coroutine: asyncio.coroutines, headers: TaskHeader | None = None) -> IdentifiableEntity:
|
|
266
|
+
task = self.create_task_entry(coroutine, headers)
|
|
108
267
|
self.tasks[task.id] = task
|
|
109
268
|
return IdentifiableEntity(id=task.id)
|
|
110
269
|
|
|
111
|
-
def
|
|
112
|
-
|
|
113
|
-
|
|
270
|
+
def update_task_status(self, task: TaskEntry) -> None:
|
|
271
|
+
"""no-op for memory strategy."""
|
|
272
|
+
pass
|
|
273
|
+
|
|
274
|
+
def get_task(self, id: str) -> TaskStatus | None:
|
|
275
|
+
if _task := self.tasks.get(id):
|
|
276
|
+
return _task.status
|
|
277
|
+
return None
|
|
278
|
+
|
|
279
|
+
def get_tasks(self) -> list[TaskStatus] | None:
|
|
280
|
+
return [task.status for task in self.tasks.values()]
|
|
281
|
+
|
|
282
|
+
def remove_task(self, id: str) -> None:
|
|
283
|
+
if id in self.tasks:
|
|
284
|
+
del self.tasks[id]
|
|
285
|
+
|
|
286
|
+
def cleanup_task(self):
|
|
287
|
+
keys = [task.id for task in self.tasks.values() if self.task_cleanup_rule(task)]
|
|
288
|
+
for key in keys:
|
|
289
|
+
self.remove_task(key)
|
|
290
|
+
|
|
291
|
+
#endregion
|
|
292
|
+
|
|
293
|
+
#db implementation
|
|
294
|
+
Base = registry().generate_base()
|
|
295
|
+
class TaskEntryModel(Base):
|
|
296
|
+
__tablename__ = "entry"
|
|
297
|
+
id = Column(String, primary_key=True)
|
|
298
|
+
status = Column(JSON)
|
|
299
|
+
model_config = ConfigDict(
|
|
300
|
+
arbitrary_types_allowed=True
|
|
301
|
+
)
|
|
302
|
+
class DatabaseTaskManagerStrategy(TaskManagerStrategy):
|
|
303
|
+
def __init__(self, db_url: str = "sqlite:///.data/db/tasks.sqlite"):
|
|
304
|
+
super().__init__()
|
|
305
|
+
self.engine = create_engine(db_url)
|
|
306
|
+
self.Session = sessionmaker(bind=self.engine)
|
|
307
|
+
Base.metadata.create_all(self.engine)
|
|
308
|
+
|
|
309
|
+
def create_task(self, coroutine: asyncio.coroutines, headers: TaskHeader | None = None) -> IdentifiableEntity:
|
|
310
|
+
task = self.create_task_entry(coroutine, headers)
|
|
311
|
+
with self.Session() as session:
|
|
312
|
+
session.add(TaskEntryModel(id=task.id, status=task.status.model_dump()))
|
|
313
|
+
session.commit()
|
|
314
|
+
return IdentifiableEntity(id=task.id)
|
|
315
|
+
|
|
316
|
+
def update_task_status(self, task: TaskEntry) -> None:
|
|
317
|
+
with self.Session() as session:
|
|
318
|
+
session.query(TaskEntryModel).filter_by(id=task.id).update(
|
|
319
|
+
{"status": task.status.model_dump()}
|
|
320
|
+
)
|
|
321
|
+
session.commit()
|
|
322
|
+
|
|
323
|
+
def get_task(self, id: str) -> TaskStatus | None:
|
|
324
|
+
with self.Session() as session:
|
|
325
|
+
task = session.query(TaskEntryModel).filter_by(id=id).first()
|
|
326
|
+
if task:
|
|
327
|
+
return TaskEntry(**task.__dict__).status
|
|
114
328
|
return None
|
|
115
329
|
|
|
116
|
-
def
|
|
117
|
-
|
|
118
|
-
|
|
330
|
+
def get_tasks(self) -> list[TaskStatus]:
|
|
331
|
+
with self.Session() as session:
|
|
332
|
+
tasks = session.query(TaskEntryModel).all()
|
|
333
|
+
if tasks:
|
|
334
|
+
return [TaskEntry(**task.__dict__).status for task in tasks]
|
|
335
|
+
return []
|
|
336
|
+
|
|
337
|
+
def remove_task(self, id: str) -> None:
|
|
338
|
+
with self.Session() as session:
|
|
339
|
+
session.query(TaskEntryModel).filter_by(id=id).delete()
|
|
340
|
+
session.commit()
|
|
119
341
|
|
|
120
342
|
def cleanup_task(self):
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
343
|
+
with self.Session() as session:
|
|
344
|
+
for task in session.query(TaskEntryModel).all():
|
|
345
|
+
_task = TaskEntry(**task.__dict__)
|
|
346
|
+
if self.task_cleanup_rule(_task):
|
|
347
|
+
session.query(TaskEntryModel).filter_by(id=task.id).delete()
|
|
348
|
+
session.commit()
|
|
349
|
+
#endregion
|
|
125
350
|
|
|
126
351
|
# global instance
|
|
127
|
-
|
|
352
|
+
def __get_taskmanager_strategy() -> TaskManagerStrategy:
|
|
353
|
+
if config.runtime_options().is_multi_process:
|
|
354
|
+
return DatabaseTaskManagerStrategy()
|
|
355
|
+
return MemoryTaskManagerStrategy()
|
|
356
|
+
task_manager = __get_taskmanager_strategy()
|
|
357
|
+
_log.info(f"Task manager strategy: {task_manager.__class__.__name__}")
|
|
128
358
|
|
|
359
|
+
#region api
|
|
129
360
|
router = APIRouter(prefix="/api/task", tags=["task"])
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
361
|
+
|
|
362
|
+
@router.get("/status/{id}")
|
|
363
|
+
async def _status_task(id: str) -> TaskStatus:
|
|
364
|
+
task_status = task_manager.get_task(id)
|
|
365
|
+
if not task_status:
|
|
134
366
|
raise HTTPException(status_code=404, detail="Task not found")
|
|
135
|
-
return
|
|
367
|
+
return task_status
|
|
368
|
+
|
|
136
369
|
@router.get("/status")
|
|
137
|
-
async def _status_task_list():
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
task_manager.remove_task(task_id)
|
|
146
|
-
return {"success":"ok"}
|
|
370
|
+
async def _status_task_list() -> list[TaskStatus]:
|
|
371
|
+
return task_manager.get_tasks()
|
|
372
|
+
|
|
373
|
+
@router.delete("/status/{id}")
|
|
374
|
+
async def _remove_task(id: str):
|
|
375
|
+
task_manager.remove_task(id)
|
|
376
|
+
return {"success":"ok"}
|
|
377
|
+
|
|
147
378
|
@router.delete("/cleanup")
|
|
148
379
|
async def _remove_task_list():
|
|
149
380
|
task_manager.cleanup_task()
|
|
150
381
|
return {"success":"ok"}
|
|
151
382
|
|
|
383
|
+
@router.get("/stats")
|
|
384
|
+
async def _stats() -> TaskStatistics:
|
|
385
|
+
return task_manager.stats()
|
|
386
|
+
|
|
387
|
+
#endregion
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ws_bom_robot_app
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.12
|
|
4
4
|
Summary: A FastAPI application serving ws bom/robot/llm platform ai.
|
|
5
5
|
Home-page: https://github.com/websolutespa/bom
|
|
6
6
|
Author: Websolute Spa
|
|
@@ -11,7 +11,7 @@ Classifier: Operating System :: OS Independent
|
|
|
11
11
|
Requires-Python: >=3.12
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
Requires-Dist: standardwebhooks==1.0.0
|
|
14
|
-
Requires-Dist:
|
|
14
|
+
Requires-Dist: apscheduler==3.11.0
|
|
15
15
|
Requires-Dist: aiofiles==24.1.0
|
|
16
16
|
Requires-Dist: pydantic==2.9.2
|
|
17
17
|
Requires-Dist: pydantic-settings==2.6.0
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
ws_bom_robot_app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
ws_bom_robot_app/auth.py,sha256=84nIbmJsMrNs0sxIQGEHbjsjc2P6ZrZZGSn8dkiL6is,895
|
|
3
|
-
ws_bom_robot_app/config.py,sha256=
|
|
4
|
-
ws_bom_robot_app/cron_manager.py,sha256=
|
|
5
|
-
ws_bom_robot_app/main.py,sha256=
|
|
6
|
-
ws_bom_robot_app/task_manager.py,sha256=
|
|
3
|
+
ws_bom_robot_app/config.py,sha256=6elpogZZW8QXD7oJX4DhYTLqsVFHq2wMcR1fz1X4qdo,3157
|
|
4
|
+
ws_bom_robot_app/cron_manager.py,sha256=0Yt5AMTPGlXZ_M5ck0SKMX8wvzoPsseEezg_s0Q3HKY,9224
|
|
5
|
+
ws_bom_robot_app/main.py,sha256=MIR2WgxX9HwkNSY2JMRRxrLt-ZGs_TfrhX_BbTbigTI,3909
|
|
6
|
+
ws_bom_robot_app/task_manager.py,sha256=7rPFvYzaVL4DSKjLXCX5SXA3mN5qJ7OwfLS0-hvvRwE,15885
|
|
7
7
|
ws_bom_robot_app/util.py,sha256=3aBK-bhsvKJwJeWOHh0c1B1BOyJ_tnUxOa1mJmFKwYQ,2618
|
|
8
8
|
ws_bom_robot_app/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
ws_bom_robot_app/llm/agent_description.py,sha256=SDJYMmwfdMxEK3a_HDEQ19bfNKmwMSFf5hqU0VSCCIE,4705
|
|
@@ -14,9 +14,9 @@ ws_bom_robot_app/llm/defaut_prompt.py,sha256=pn5a4lNLWE1NngHYjA_7tD8GasePMgsgude
|
|
|
14
14
|
ws_bom_robot_app/llm/main.py,sha256=HWTaAmxZpVo9HEc2cfv9DqA3tlaVGl1NdBONgEI16ds,3595
|
|
15
15
|
ws_bom_robot_app/llm/settings.py,sha256=EkFGCppORenStH9W4e6_dYvQ-5p6xiEMpmUHBqNqG9M,117
|
|
16
16
|
ws_bom_robot_app/llm/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
ws_bom_robot_app/llm/models/api.py,sha256=
|
|
17
|
+
ws_bom_robot_app/llm/models/api.py,sha256=KlVUbApyz6uuWefAN9K4B_vWDSps5hLW6hNg1Eo3TBQ,6996
|
|
18
18
|
ws_bom_robot_app/llm/models/base.py,sha256=1TqxuTK3rjJEALn7lvgoen_1ba3R2brAgGx6EDTtDZo,152
|
|
19
|
-
ws_bom_robot_app/llm/models/kb.py,sha256=
|
|
19
|
+
ws_bom_robot_app/llm/models/kb.py,sha256=9zqwDlVULVrWE48wo5AivzWoOtnjA57k9rsw8KNnyDk,8935
|
|
20
20
|
ws_bom_robot_app/llm/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
21
|
ws_bom_robot_app/llm/tools/tool_builder.py,sha256=rkYu0PrXV84PMi7INjCSWlrWMykUCI8aeF-QjZgLysM,854
|
|
22
22
|
ws_bom_robot_app/llm/tools/tool_manager.py,sha256=y4K1NiDsVbdZjk2xUEK_T6j-4fHmV5QY02j5tHcoBRs,3708
|
|
@@ -29,17 +29,17 @@ ws_bom_robot_app/llm/utils/download.py,sha256=iAUxH_NiCpTPtGzhC4hBtxotd2HPFt2MBh
|
|
|
29
29
|
ws_bom_robot_app/llm/utils/faiss_helper.py,sha256=DowmroVT6eIbvnA-TG84PS_D7ujvxSRIKdLuIcJmd6Q,4650
|
|
30
30
|
ws_bom_robot_app/llm/utils/kb.py,sha256=jja45WCbNI7SGEgqDS99nErlwB5eY8Ga7BMnhdMHZ90,1279
|
|
31
31
|
ws_bom_robot_app/llm/utils/print.py,sha256=bpLWY0KHXe7x7PWcWy8NS54ZWzHY8b4jrLRkpnDl108,818
|
|
32
|
-
ws_bom_robot_app/llm/utils/webhooks.py,sha256=
|
|
32
|
+
ws_bom_robot_app/llm/utils/webhooks.py,sha256=LAAZqyN6VhV13wu4X-X85TwdDgAV2rNvIwQFIIc0FJM,2114
|
|
33
33
|
ws_bom_robot_app/llm/vector_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
|
-
ws_bom_robot_app/llm/vector_store/generator.py,sha256=
|
|
34
|
+
ws_bom_robot_app/llm/vector_store/generator.py,sha256=SrxrZ87JmWW4PQ-zP8upJJfamWur49fvH2eoIjEVoCI,5771
|
|
35
35
|
ws_bom_robot_app/llm/vector_store/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
36
|
ws_bom_robot_app/llm/vector_store/integration/base.py,sha256=eCKD3U0KPoVDMtKr2iZqauMFEKd9b2k6rqPG_YjDy0g,626
|
|
37
37
|
ws_bom_robot_app/llm/vector_store/integration/manager.py,sha256=cSFlE2erMv3Uchy788mlCFdcvmyeoqdeIiGmJ9QbLhY,583
|
|
38
|
-
ws_bom_robot_app/llm/vector_store/integration/sitemap.py,sha256=
|
|
38
|
+
ws_bom_robot_app/llm/vector_store/integration/sitemap.py,sha256=nPbIywp-ZwWbWStvjvYVgHqqejyYFr8eZhBc8ycTuaU,4206
|
|
39
39
|
ws_bom_robot_app/llm/vector_store/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
|
-
ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=
|
|
41
|
-
ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=
|
|
42
|
-
ws_bom_robot_app-0.0.
|
|
43
|
-
ws_bom_robot_app-0.0.
|
|
44
|
-
ws_bom_robot_app-0.0.
|
|
45
|
-
ws_bom_robot_app-0.0.
|
|
40
|
+
ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=dhZ7F4EJmuYa2TBMggWVpQe4_NmS2wi312lHnNm5Jm0,4571
|
|
41
|
+
ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=qo9ejRZyKv_k6jnGgXnu1W5uqsMMtgqK_uvPpZQ0p74,833
|
|
42
|
+
ws_bom_robot_app-0.0.12.dist-info/METADATA,sha256=2ATPWZDFjdg9vQw3MLBJOXaGUU8f1JgrrlhAOGKQpyw,5678
|
|
43
|
+
ws_bom_robot_app-0.0.12.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
44
|
+
ws_bom_robot_app-0.0.12.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
|
|
45
|
+
ws_bom_robot_app-0.0.12.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|