ws-bom-robot-app 0.0.10__tar.gz → 0.0.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/PKG-INFO +2 -2
  2. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/setup.py +1 -1
  3. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/config.py +30 -1
  4. ws_bom_robot_app-0.0.12/ws_bom_robot_app/cron_manager.py +251 -0
  5. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/models/api.py +2 -2
  6. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/models/kb.py +1 -1
  7. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/webhooks.py +1 -0
  8. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/generator.py +1 -1
  9. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/integration/sitemap.py +23 -20
  10. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/loader/base.py +2 -1
  11. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/loader/json_loader.py +3 -4
  12. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/main.py +13 -2
  13. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/requirements.txt +1 -1
  14. ws_bom_robot_app-0.0.12/ws_bom_robot_app/task_manager.py +387 -0
  15. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app.egg-info/PKG-INFO +2 -2
  16. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app.egg-info/requires.txt +1 -1
  17. ws_bom_robot_app-0.0.10/ws_bom_robot_app/cron_manager.py +0 -99
  18. ws_bom_robot_app-0.0.10/ws_bom_robot_app/task_manager.py +0 -151
  19. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/README.md +0 -0
  20. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/pyproject.toml +0 -0
  21. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/setup.cfg +0 -0
  22. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/__init__.py +0 -0
  23. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/auth.py +0 -0
  24. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/__init__.py +0 -0
  25. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/agent_description.py +0 -0
  26. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/agent_handler.py +0 -0
  27. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/agent_lcel.py +0 -0
  28. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/api.py +0 -0
  29. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/defaut_prompt.py +0 -0
  30. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/main.py +0 -0
  31. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/models/__init__.py +0 -0
  32. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/models/base.py +0 -0
  33. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/settings.py +0 -0
  34. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/__init__.py +0 -0
  35. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/models/__init__.py +0 -0
  36. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/models/main.py +0 -0
  37. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/tool_builder.py +0 -0
  38. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/tool_manager.py +0 -0
  39. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/tools/utils.py +0 -0
  40. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/__init__.py +0 -0
  41. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/agent_utils.py +0 -0
  42. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/download.py +0 -0
  43. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/faiss_helper.py +0 -0
  44. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/kb.py +0 -0
  45. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/utils/print.py +0 -0
  46. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/__init__.py +0 -0
  47. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/integration/__init__.py +0 -0
  48. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/integration/base.py +0 -0
  49. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/integration/manager.py +0 -0
  50. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/llm/vector_store/loader/__init__.py +0 -0
  51. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app/util.py +0 -0
  52. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app.egg-info/SOURCES.txt +0 -0
  53. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app.egg-info/dependency_links.txt +0 -0
  54. {ws_bom_robot_app-0.0.10 → ws_bom_robot_app-0.0.12}/ws_bom_robot_app.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.10
3
+ Version: 0.0.12
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -11,7 +11,7 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.12
12
12
  Description-Content-Type: text/markdown
13
13
  Requires-Dist: standardwebhooks==1.0.0
14
- Requires-Dist: schedule==1.2.2
14
+ Requires-Dist: apscheduler==3.11.0
15
15
  Requires-Dist: aiofiles==24.1.0
16
16
  Requires-Dist: pydantic==2.9.2
17
17
  Requires-Dist: pydantic-settings==2.6.0
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="ws_bom_robot_app",
5
- version="0.0.10",
5
+ version="0.0.12",
6
6
  description="A FastAPI application serving ws bom/robot/llm platform ai.",
7
7
  long_description=open("README.md", encoding='utf-8').read(),
8
8
  long_description_content_type="text/markdown",
@@ -1,6 +1,7 @@
1
+ from typing import Optional
1
2
  from pydantic import BaseModel, ConfigDict
2
3
  from pydantic_settings import BaseSettings
3
-
4
+ import os
4
5
 
5
6
  class Settings(BaseSettings):
6
7
  robot_env: str = 'local'
@@ -12,6 +13,7 @@ class Settings(BaseSettings):
12
13
  robot_data_db_folder_out: str = 'out'
13
14
  robot_data_db_folder_store: str = 'store'
14
15
  robot_data_db_retention_days: float = 60
16
+ robot_task_max_concurrent: int = os.cpu_count() or 1
15
17
  robot_task_retention_days: float = 1
16
18
  robot_cms_host: str = ''
17
19
  robot_cms_auth: str = ''
@@ -25,10 +27,37 @@ class Settings(BaseSettings):
25
27
  )
26
28
 
27
29
  class RuntimeOptions(BaseModel):
30
+ @staticmethod
31
+ def _get_number_of_workers() -> int:
32
+ """
33
+ Returns the number of worker processes to use for the application.
34
+
35
+ This function inspects the command-line arguments to determine the number
36
+ of worker processes to use. It looks for the "--workers" argument and
37
+ returns the subsequent value as an integer.
38
+ Sample of command-line arguments:
39
+ fastapi dev main.py --port 6001
40
+ fastapi run main.py --port 6001 --workers 4
41
+ uvicorn main:app --port 6001 --workers 4
42
+
43
+ Returns:
44
+ Optional[int]: The number of worker processes to use, or 1 if
45
+ the argument is not found or the value is invalid.
46
+ """
47
+ import sys
48
+ try:
49
+ for i, arg in enumerate(sys.argv):
50
+ if arg == "--workers" and i + 1 < len(sys.argv):
51
+ return int(sys.argv[i + 1])
52
+ except (ValueError, IndexError):
53
+ pass
54
+ return 1
28
55
  debug: bool
29
56
  loader_strategy: str
30
57
  loader_show_progress: bool
31
58
  loader_silent_errors: bool
59
+ number_of_workers: int = _get_number_of_workers()
60
+ is_multi_process: bool = _get_number_of_workers() > 1
32
61
 
33
62
 
34
63
  def runtime_options(self) -> RuntimeOptions:
@@ -0,0 +1,251 @@
1
+ from apscheduler.schedulers.background import BackgroundScheduler
2
+ #from apscheduler.schedulers.asyncio import AsyncIOScheduler
3
+ from apscheduler.jobstores.memory import MemoryJobStore
4
+ from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
5
+ from apscheduler.triggers.cron import CronTrigger
6
+ from apscheduler.triggers.interval import IntervalTrigger
7
+ from apscheduler.triggers.date import DateTrigger
8
+ from fastapi import APIRouter
9
+ from datetime import datetime
10
+ from ws_bom_robot_app.task_manager import task_manager
11
+ from ws_bom_robot_app.llm.utils.kb import kb_cleanup_data_file
12
+ from ws_bom_robot_app.util import _log
13
+ from ws_bom_robot_app.config import config
14
+
15
+ class JobstoreStrategy:
16
+ def get_jobstore(self):
17
+ raise NotImplementedError("Subclasses should implement this method")
18
+
19
+ class MemoryJobstoreStrategy(JobstoreStrategy):
20
+ def get_jobstore(self):
21
+ _log.info("Using in-memory cron jobstore.")
22
+ return {"default": MemoryJobStore()}
23
+
24
+ class PersistentJobstoreStrategy(JobstoreStrategy):
25
+ def get_jobstore(self, db_url: str = "sqlite:///.data/db/jobs.sqlite"):
26
+ _log.info(f"Using persistent crob jobstore with database URL: {db_url}.")
27
+ return {"default": SQLAlchemyJobStore(url=db_url)}
28
+
29
+ class Job:
30
+ def __init__(self, name: str, job_func, args: list = None, kwargs: dict = None, cron_expression: str = None, interval: int = None, run_at: datetime = None):
31
+ """
32
+ Job class that supports both recurring and one-time jobs.
33
+ :param job_func: The function to execute.
34
+ :param interval: Interval in seconds for recurring jobs.
35
+ :param run_at: Specific datetime for one-time jobs.
36
+ :param tags: Tags associated with the job.
37
+ """
38
+ if not (cron_expression or interval or run_at):
39
+ raise ValueError("Either 'interval' or 'run_at' must be provided.")
40
+ self.name = name
41
+ self.job_func = job_func
42
+ self.args: list = args or []
43
+ self.kwargs: dict = kwargs or {}
44
+ self.cron_expression = cron_expression
45
+ self.interval = interval
46
+ self.run_at = run_at
47
+
48
+ def create_trigger(self):
49
+ """Create the appropriate trigger based on the job type."""
50
+ if self.cron_expression:
51
+ return CronTrigger.from_crontab(self.cron_expression)
52
+ if self.interval:
53
+ return IntervalTrigger(seconds=self.interval)
54
+ elif self.run_at:
55
+ return DateTrigger(run_date=self.run_at)
56
+
57
+ class CronManager:
58
+ _list_default = [
59
+ Job('cleanup-task',task_manager.cleanup_task, interval=5 * 60),
60
+ Job('cleanup-data',kb_cleanup_data_file, interval=180 * 60),
61
+ ]
62
+ def __get_jobstore_strategy() -> JobstoreStrategy:
63
+ if True or config.runtime_options().is_multi_process:
64
+ return MemoryJobstoreStrategy()
65
+ return PersistentJobstoreStrategy()
66
+ def __init__(self, strategy: JobstoreStrategy = None, enable_defaults: bool = True):
67
+ self.enable_defaults = enable_defaults
68
+ if strategy is None:
69
+ strategy = CronManager.__get_jobstore_strategy()
70
+ jobstores = strategy.get_jobstore()
71
+ self.scheduler: BackgroundScheduler = BackgroundScheduler(jobstores=jobstores)
72
+ self.__scheduler_is_running = False
73
+
74
+ def add_job(self, job: Job):
75
+ """
76
+ Adds a job to the scheduler with the specified name and job details.
77
+ Args:
78
+ name (str): The unique identifier for the job.
79
+ job (Job): An instance of the Job class containing the job details.
80
+ The job details include:
81
+ - job_func: The function to be executed.
82
+ - args: The positional arguments to pass to the job function.
83
+ - kwargs: The keyword arguments to pass to the job function.
84
+ - trigger: The trigger that determines when the job should be executed.
85
+ The job will replace any existing job with the same name.
86
+ Sample usage:
87
+ recurring_job = Job(name="sample-recurring-job",job_func=example_job, interval=5, tags=tags, args=args, kwargs=kwargs)
88
+ cron_manager.add_job(recurring_job)
89
+ fire_once_job = Job(name="sample-fire-once-job",job_func=example_job, run_at=datetime.now(), tags=tags, args=args, kwargs=kwargs)
90
+ cron_manager.add_job(fire_once_job)
91
+ """
92
+ existing_job = self.scheduler.get_job(job.name)
93
+ if existing_job:
94
+ _log.info(f"Job with name '{job.name}' already exists. Skip creation.")
95
+ else:
96
+ trigger = job.create_trigger()
97
+ self.scheduler.add_job(
98
+ func=job.job_func,
99
+ args=job.args,
100
+ kwargs=job.kwargs,
101
+ trigger=trigger,
102
+ id=job.name,
103
+ name=job.name,
104
+ replace_existing=True
105
+ )
106
+
107
+ def start(self):
108
+ if not self.__scheduler_is_running:
109
+ self.__scheduler_is_running = True
110
+ self.scheduler.start()
111
+ if self.enable_defaults and CronManager._list_default:
112
+ for job in CronManager._list_default:
113
+ existing_job = self.scheduler.get_job(job.name)
114
+ if existing_job is None:
115
+ self.add_job(job)
116
+
117
+
118
+ def get_job(self, job_id: str):
119
+ return self.scheduler.get_job(job_id)
120
+
121
+ def get_jobs(self):
122
+ return self.scheduler.get_jobs()
123
+
124
+ def execute_job(self, job_id: str):
125
+ job = self.scheduler.get_job(job_id)
126
+ if job:
127
+ job.func()
128
+ else:
129
+ raise ValueError(f"Job with id '{job_id}' not found.")
130
+
131
+ def pause_job(self, job_id: str):
132
+ self.scheduler.pause_job(job_id)
133
+
134
+ def resume_job(self, job_id: str):
135
+ self.scheduler.resume_job(job_id)
136
+
137
+ def remove_job(self, job_id: str):
138
+ self.scheduler.remove_job(job_id)
139
+
140
+ def execute_recurring_jobs(self):
141
+ for job in self.scheduler.get_jobs():
142
+ if job.interval:
143
+ job.job_func()
144
+
145
+ def pause_recurring_jobs(self):
146
+ for job in self.scheduler.get_jobs():
147
+ if job.interval:
148
+ self.pause_job(job.id)
149
+
150
+ def resume_recurring_jobs(self):
151
+ for job in self.scheduler.get_jobs():
152
+ if job.interval:
153
+ self.resume_job(job.id)
154
+
155
+ def remove_recurring_jobs(self):
156
+ for job in self.scheduler.get_jobs():
157
+ if job.interval:
158
+ self.remove_job(job.id)
159
+
160
+ def clear(self):
161
+ self.__scheduler_is_running = False
162
+ self.scheduler.remove_all_jobs()
163
+
164
+ def shutdown(self):
165
+ self.scheduler.shutdown()
166
+
167
+ cron_manager = CronManager()
168
+
169
+ # FastAPI Routes
170
+ router = APIRouter(prefix="/api/cron", tags=["cron"])
171
+
172
+ @router.get("/list")
173
+ def _list():
174
+ def __format(job):
175
+ return {
176
+ "id": job.id,
177
+ "name": job.name,
178
+ "func": job.func_ref,
179
+ "pending": job.pending,
180
+ "trigger": str(job.trigger),
181
+ "next_run_time": job.next_run_time
182
+ }
183
+ return [__format(job) for job in cron_manager.get_jobs()]
184
+
185
+ @router.get("/default-jobs")
186
+ def _default_jobs():
187
+ def __format(job):
188
+ existing_job = cron_manager.scheduler.get_job(job.name)
189
+ return {
190
+ "name": job.name,
191
+ "status": "exists" if existing_job else "not added"
192
+ }
193
+ return [__format(job) for job in CronManager._list_default]
194
+
195
+ @router.post("/execute-job/{job_id}")
196
+ def _execute_job(job_id: str):
197
+ try:
198
+ cron_manager.execute_job(job_id)
199
+ return {"status": f"Job {job_id} executed"}
200
+ except ValueError as e:
201
+ return {"error": str(e)}
202
+
203
+ @router.post("/pause-job/{job_id}")
204
+ def _pause_job(job_id: str):
205
+ cron_manager.pause_job(job_id)
206
+ return {"status": f"Job {job_id} paused"}
207
+
208
+ @router.post("/resume-job/{job_id}")
209
+ def _resume_job(job_id: str):
210
+ cron_manager.resume_job(job_id)
211
+ return {"status": f"Job {job_id} resumed"}
212
+
213
+ @router.delete("/remove-job/{job_id}")
214
+ def _remove_job(job_id: str):
215
+ cron_manager.remove_job(job_id)
216
+ return {"status": f"Job {job_id} removed"}
217
+
218
+ @router.post("/execute-recurring")
219
+ def _execute_recurring():
220
+ cron_manager.execute_recurring_jobs()
221
+ return {"status": "All recurring jobs executed"}
222
+
223
+ @router.post("/pause-recurring")
224
+ def _pause_recurring():
225
+ cron_manager.pause_recurring_jobs()
226
+ return {"status": "All recurring jobs paused"}
227
+
228
+ @router.post("/resume-recurring")
229
+ def _resume_recurring():
230
+ cron_manager.resume_recurring_jobs()
231
+ return {"status": "All recurring jobs resumed"}
232
+
233
+ @router.delete("/remove-recurring")
234
+ def _remove_recurring():
235
+ cron_manager.remove_recurring_jobs()
236
+ return {"status": "All recurring jobs removed"}
237
+
238
+ @router.get("/start")
239
+ def _start():
240
+ cron_manager.start()
241
+ return {"status": "started"}
242
+
243
+ @router.delete("/stop")
244
+ def _stop():
245
+ cron_manager.clear()
246
+ return {"status": "stopped"}
247
+
248
+ @router.get("/shutdown")
249
+ def _shutdown():
250
+ cron_manager.shutdown()
251
+ return {"status": "shutdown"}
@@ -1,4 +1,4 @@
1
- from typing import List, Dict, Optional
1
+ from typing import List, Dict, Optional, Union
2
2
  from datetime import datetime
3
3
  from pydantic import AliasChoices, BaseModel, Field, ConfigDict
4
4
  from ws_bom_robot_app.llm.models.kb import LlmKbEndpoint, LlmKbIntegration
@@ -125,7 +125,7 @@ class VectorDbRequest(BaseModel):
125
125
  def api_key(self):
126
126
  return self.secrets.get("openAIApiKey", "")
127
127
  def out_name(self):
128
- return "vector_db_" + datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f")[:-3]
128
+ return f"db_{datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f")[:-3]}_{os.getpid()}"
129
129
 
130
130
  class RulesRequest(VectorDbRequest):
131
131
  type: Optional[str] = 'rules'
@@ -144,7 +144,7 @@ async def load_endpoints(endpoints: list[LlmKbEndpoint], destination_directory:
144
144
  documents = await JsonLoader(
145
145
  file_path,
146
146
  meta_fields=[field.name for field in endpoint.fields_mapping.meta_fields] if endpoint.fields_mapping.meta_fields else []
147
- ).load()
147
+ ).aload()
148
148
  _documents.extend(documents)
149
149
  await aiofiles.os.remove(file_path)
150
150
  except Exception as e:
@@ -51,3 +51,4 @@ class WebhookNotifier:
51
51
  async with httpx.AsyncClient(headers=_headers,verify=False,timeout=timeout) as client:
52
52
  response = await client.post(endpoint, data=_data)
53
53
  response.raise_for_status()
54
+
@@ -9,7 +9,7 @@ from ws_bom_robot_app.llm.vector_store.integration.manager import IntegrationMan
9
9
  from ws_bom_robot_app.llm.utils.faiss_helper import FaissHelper
10
10
  from ws_bom_robot_app.util import timer
11
11
 
12
- @timer
12
+ #@timer
13
13
  async def rules(rq: RulesRequest) -> VectorDbResponse:
14
14
  api_key = rq.api_key()
15
15
  _config = rq.config()
@@ -1,3 +1,4 @@
1
+ from typing import Any
1
2
  import aiofiles
2
3
  import aiofiles.os
3
4
  from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
@@ -5,23 +6,21 @@ from langchain_community.document_loaders.sitemap import SitemapLoader
5
6
  from langchain_community.document_transformers import MarkdownifyTransformer as markdownify
6
7
  from langchain_core.documents import Document
7
8
  from bs4 import BeautifulSoup, Tag
8
- import nest_asyncio, os
9
-
10
9
 
11
10
  class Sitemap(IntegrationStrategy):
12
- """_summary_
11
+ """Class to load a sitemap.xml file and extract text from the URLs.
13
12
  Load a sitemap.xml file and extract text from the urls.
14
13
  Args:
15
14
  data (dict[str, str]):
16
15
  data["sitemapUrl"] (str): absolute/relative url of the sitemap.xml
17
16
  data["outputFormat"] (str): ["text", "html", "markdown"] default to "text"
18
- data["filterUrls"] list: list of regex pattern to filter urls ["https://www.example.com/en/products", "^.*products.*$"]
19
- data["includeOnlySelector"] : [".content", "#main-article", "article p"]
20
- data["excludeTag"] (str): default to ["script", "noscript", "style", "head", "header","nav","footer", "iframe"]
21
- data["excludeClass"] (str): ["class1", "class2"]
22
- data["excludeId"] (str): ["id1", "id2"]
17
+ data["filterUrls"] list[str]: list of regex pattern to filter urls ["https://www.example.com/en/products", "^.*products.*$"]
18
+ data["includeOnlySelector"] : list[str] [".content", "#main-article", "article p"]
19
+ data["excludeTag"] (list[str]): default to ["script", "noscript", "style", "head", "header","nav","footer", "iframe"]
20
+ data["excludeClass"] (list[str]): ["class1", "class2"]
21
+ data["excludeId"] (list[str]): ["id1", "id2"]
23
22
  """
24
- def __init__(self, knowledgebase_path: str, data: dict[str, str]):
23
+ def __init__(self, knowledgebase_path: str, data: dict[str, Any]):
25
24
  super().__init__(knowledgebase_path, data)
26
25
  self.__sitemap_url = self.data.get("sitemapUrl")
27
26
  self.__filter_urls: list[str] = self.data.get("filterUrls",[]) # type: ignore
@@ -30,12 +29,12 @@ class Sitemap(IntegrationStrategy):
30
29
  self.__exclude_tag: list[str] = self.data.get("excludeTag",[]) # type: ignore
31
30
  self.__exclude_class: list[str] = self.data.get("excludeClass",[]) # type: ignore
32
31
  self.__exclude_id: list[str] = self.data.get("excludeId",[]) # type: ignore
33
- def working_subdirectory(self) -> str: # type: ignore
32
+ def working_subdirectory(self) -> str:
34
33
  return ""
35
34
  def _extract(self, tag: Tag) -> str:
36
35
  return tag.get_text() if self.__output_format == "text" else tag.prettify()
37
36
  def _output(self, documents: list[Document]) -> list[Document]:
38
- return list(markdownify().transform_documents(documents)) if (self.__output_format == "markdown") else documents
37
+ return list(markdownify().transform_documents(documents)) if self.__output_format == "markdown" else documents
39
38
  def _parse(self,content: BeautifulSoup) -> str:
40
39
  if self.__include_only_selectors:
41
40
  extracted = []
@@ -55,21 +54,25 @@ class Sitemap(IntegrationStrategy):
55
54
  for _ in content.select(element):
56
55
  _.decompose()
57
56
  return str(self._extract(content))
58
- async def load(self) -> list[Document]:
59
- def _is_local(url: str) -> bool:
60
- return not url.startswith("http")
61
- def _remap_if_local(url: str) -> str:
62
- return f"{self.knowledgebase_path}/{url}" if _is_local(url) else url
57
+ def _is_local(self, url: str) -> bool:
58
+ return not url.startswith("http")
63
59
 
60
+ def _remap_if_local(self, url: str) -> str:
61
+ return f"{self.knowledgebase_path}/{url}" if self._is_local(url) else url
62
+
63
+ async def load(self) -> list[Document]:
64
64
  if (self.__sitemap_url):
65
65
  _loader = SitemapLoader(
66
- web_path=_remap_if_local(self.__sitemap_url),
66
+ web_path=self._remap_if_local(self.__sitemap_url),
67
67
  filter_urls=self.__filter_urls,
68
68
  parsing_function=self._parse,
69
- is_local=_is_local(self.__sitemap_url)
69
+ is_local=self._is_local(self.__sitemap_url)
70
70
  )
71
71
  _docs = self._output([document async for document in _loader.alazy_load()])
72
- if _is_local(self.__sitemap_url):
73
- await aiofiles.os.remove(_loader.web_path)
72
+ if self._is_local(self.__sitemap_url):
73
+ try:
74
+ await aiofiles.os.remove(_loader.web_path)
75
+ except FileNotFoundError:
76
+ pass
74
77
  return _docs
75
78
  return []
@@ -109,4 +109,5 @@ class Loader():
109
109
  #@timer
110
110
  async def load(self) -> list[Document]:
111
111
  loaders = MergedDataLoader(self.__directory_loader())
112
- return await asyncio.to_thread(loaders.load)
112
+ return await loaders.aload()
113
+ #return await asyncio.to_thread(loaders.load)
@@ -2,7 +2,6 @@ import json
2
2
  from typing import Optional
3
3
  from langchain_core.documents import Document
4
4
  from langchain_community.document_loaders.base import BaseLoader
5
- import aiofiles
6
5
 
7
6
  class JsonLoader(BaseLoader):
8
7
  def __init__(self, file_path: str, meta_fields:Optional[list[str]] = [],encoding: Optional[str] = "utf-8"):
@@ -10,9 +9,9 @@ class JsonLoader(BaseLoader):
10
9
  self.meta_fields = meta_fields
11
10
  self.encoding = encoding
12
11
 
13
- async def load(self) -> list[Document]:
14
- async with aiofiles.open(self.file_path, "r", encoding=self.encoding) as file:
15
- data = json.loads(await file.read())
12
+ def load(self) -> list[Document]:
13
+ with open(self.file_path, "r", encoding=self.encoding) as file:
14
+ data = json.load(file)
16
15
  _list = data if isinstance(data, list) else [data]
17
16
  return [
18
17
  Document(
@@ -1,5 +1,6 @@
1
1
  import datetime
2
2
  import platform
3
+ from fastapi.responses import FileResponse
3
4
  import uvicorn, os, sys
4
5
  from fastapi import FastAPI, Depends
5
6
  from fastapi.openapi.docs import get_swagger_ui_html
@@ -22,7 +23,10 @@ app.include_router(cron,dependencies=[Depends(authenticate)])
22
23
 
23
24
  @app.get("/")
24
25
  async def root():
25
- return {}
26
+ return health()
27
+ @app.get("/favicon.ico")
28
+ async def favicon():
29
+ return FileResponse("./favicon.ico")
26
30
 
27
31
  @app.get("/docs", include_in_schema=False)
28
32
  async def get_swagger_documentation(authenticate: bool = Depends(authenticate)):
@@ -31,7 +35,11 @@ async def get_swagger_documentation(authenticate: bool = Depends(authenticate)):
31
35
  async def openapi(authenticate: bool = Depends(authenticate)):
32
36
  return get_openapi(title=app.title, version=app.version, routes=app.routes)
33
37
 
34
- @app.get("/diag",tags=["diag"])
38
+ @app.get("/api/health",tags=["diag"])
39
+ def health():
40
+ return {"status": "ok"}
41
+
42
+ @app.get("/api/diag",tags=["diag"])
35
43
  def diag(authenticate: bool = Depends(authenticate)):
36
44
  import pkg_resources
37
45
  from ws_bom_robot_app.llm.vector_store.loader.base import Loader as wsll
@@ -48,6 +56,7 @@ def diag(authenticate: bool = Depends(authenticate)):
48
56
  "version": platform.version(),
49
57
  "type": platform.machine(),
50
58
  "processor": platform.processor(),
59
+ "cpu": os.cpu_count(),
51
60
  "architecture": platform.architecture()
52
61
  },
53
62
  "sys": {
@@ -57,6 +66,7 @@ def diag(authenticate: bool = Depends(authenticate)):
57
66
  "args": {k: arg for k, arg in enumerate(sys.argv)}
58
67
  },
59
68
  "os": {
69
+ "ppid": os.getppid(),
60
70
  "pid": os.getpid(),
61
71
  "cwd": os.getcwd(),
62
72
  "ws_bom_robot_app": pkg_resources.get_distribution("ws_bom_robot_app").version,
@@ -64,6 +74,7 @@ def diag(authenticate: bool = Depends(authenticate)):
64
74
  },
65
75
  },
66
76
  "config":config,
77
+ "runtime":config.runtime_options(),
67
78
  "extension": {
68
79
  "loader": ({item[0]: item[1].loader.__name__ if item[1] else None} for item in sorted(wsll._list.items(), key=lambda x: x[0]) if item[1]),
69
80
  "integration":({item[0]: type(item[1]).__name__} for item in wsim._list.items()),
@@ -1,6 +1,6 @@
1
1
  #app
2
2
  standardwebhooks==1.0.0
3
- schedule==1.2.2
3
+ apscheduler==3.11.0
4
4
  aiofiles==24.1.0
5
5
  pydantic==2.9.2
6
6
  pydantic-settings==2.6.0