ws-bom-robot-app 0.0.63__py3-none-any.whl → 0.0.103__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. ws_bom_robot_app/config.py +30 -8
  2. ws_bom_robot_app/cron_manager.py +13 -12
  3. ws_bom_robot_app/llm/agent_context.py +1 -1
  4. ws_bom_robot_app/llm/agent_handler.py +11 -12
  5. ws_bom_robot_app/llm/agent_lcel.py +80 -18
  6. ws_bom_robot_app/llm/api.py +69 -7
  7. ws_bom_robot_app/llm/evaluator.py +319 -0
  8. ws_bom_robot_app/llm/main.py +51 -28
  9. ws_bom_robot_app/llm/models/api.py +40 -6
  10. ws_bom_robot_app/llm/nebuly_handler.py +18 -15
  11. ws_bom_robot_app/llm/providers/llm_manager.py +233 -75
  12. ws_bom_robot_app/llm/tools/tool_builder.py +4 -1
  13. ws_bom_robot_app/llm/tools/tool_manager.py +48 -22
  14. ws_bom_robot_app/llm/utils/chunker.py +6 -1
  15. ws_bom_robot_app/llm/utils/cleanup.py +81 -0
  16. ws_bom_robot_app/llm/utils/cms.py +60 -14
  17. ws_bom_robot_app/llm/utils/download.py +112 -8
  18. ws_bom_robot_app/llm/vector_store/db/base.py +50 -0
  19. ws_bom_robot_app/llm/vector_store/db/chroma.py +28 -8
  20. ws_bom_robot_app/llm/vector_store/db/faiss.py +35 -8
  21. ws_bom_robot_app/llm/vector_store/db/qdrant.py +29 -14
  22. ws_bom_robot_app/llm/vector_store/integration/api.py +216 -0
  23. ws_bom_robot_app/llm/vector_store/integration/azure.py +1 -1
  24. ws_bom_robot_app/llm/vector_store/integration/base.py +58 -15
  25. ws_bom_robot_app/llm/vector_store/integration/confluence.py +33 -5
  26. ws_bom_robot_app/llm/vector_store/integration/dropbox.py +1 -1
  27. ws_bom_robot_app/llm/vector_store/integration/gcs.py +1 -1
  28. ws_bom_robot_app/llm/vector_store/integration/github.py +22 -22
  29. ws_bom_robot_app/llm/vector_store/integration/googledrive.py +46 -17
  30. ws_bom_robot_app/llm/vector_store/integration/jira.py +93 -60
  31. ws_bom_robot_app/llm/vector_store/integration/manager.py +6 -2
  32. ws_bom_robot_app/llm/vector_store/integration/s3.py +1 -1
  33. ws_bom_robot_app/llm/vector_store/integration/sftp.py +1 -1
  34. ws_bom_robot_app/llm/vector_store/integration/sharepoint.py +7 -14
  35. ws_bom_robot_app/llm/vector_store/integration/shopify.py +143 -0
  36. ws_bom_robot_app/llm/vector_store/integration/sitemap.py +6 -1
  37. ws_bom_robot_app/llm/vector_store/integration/slack.py +3 -2
  38. ws_bom_robot_app/llm/vector_store/integration/thron.py +236 -0
  39. ws_bom_robot_app/llm/vector_store/loader/base.py +52 -8
  40. ws_bom_robot_app/llm/vector_store/loader/docling.py +71 -33
  41. ws_bom_robot_app/main.py +148 -146
  42. ws_bom_robot_app/subprocess_runner.py +106 -0
  43. ws_bom_robot_app/task_manager.py +204 -53
  44. ws_bom_robot_app/util.py +6 -0
  45. {ws_bom_robot_app-0.0.63.dist-info → ws_bom_robot_app-0.0.103.dist-info}/METADATA +158 -75
  46. ws_bom_robot_app-0.0.103.dist-info/RECORD +76 -0
  47. ws_bom_robot_app/llm/settings.py +0 -4
  48. ws_bom_robot_app/llm/utils/kb.py +0 -34
  49. ws_bom_robot_app-0.0.63.dist-info/RECORD +0 -72
  50. {ws_bom_robot_app-0.0.63.dist-info → ws_bom_robot_app-0.0.103.dist-info}/WHEEL +0 -0
  51. {ws_bom_robot_app-0.0.63.dist-info → ws_bom_robot_app-0.0.103.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  from collections import deque
2
2
  import inspect
3
3
  from math import floor
4
- import asyncio, os
4
+ import asyncio, os, traceback
5
5
  from datetime import datetime, timedelta
6
6
  from enum import Enum
7
7
  from typing import Annotated, Coroutine, Literal, TypeVar, Optional, Dict, Union, Any, Callable
@@ -15,6 +15,8 @@ from ws_bom_robot_app.util import _log
15
15
  from sqlalchemy import create_engine, Column, String, JSON, DateTime, Enum
16
16
  from sqlalchemy.orm import sessionmaker, registry
17
17
  from abc import ABC, abstractmethod
18
+ from ws_bom_robot_app.subprocess_runner import _start_subprocess_for_coroutine, _recv_from_connection_async, _pickler
19
+ from ws_bom_robot_app.config import config
18
20
 
19
21
  T = TypeVar('T')
20
22
 
@@ -58,7 +60,8 @@ class TaskMetaData(BaseModel):
58
60
  )
59
61
  source: Optional[str] = None
60
62
  pid: Optional[int] = None
61
- extra: Optional[dict[str,str]] = None
63
+ pid_child: Optional[int] = None
64
+ extra: Optional[dict[str,Union[str,int,bool]]] = None
62
65
 
63
66
  class TaskStatus(IdentifiableEntity):
64
67
  type: Optional[str] = None
@@ -66,19 +69,21 @@ class TaskStatus(IdentifiableEntity):
66
69
  result: Optional[T] = None
67
70
  metadata: TaskMetaData = None
68
71
  error: Optional[str] = None
72
+ retry: int = 0
69
73
  model_config = ConfigDict(
70
74
  arbitrary_types_allowed=True
71
75
  )
72
76
 
73
77
  class TaskEntry(IdentifiableEntity):
74
- task: Annotated[asyncio.Task, Field(default=None, validate_default=False)] = None
75
- coroutine: Coroutine = None
78
+ task: Annotated[Union[asyncio.Task, Callable], Field(default=None, validate_default=False)] = None
79
+ coroutine: Any = None
76
80
  headers: TaskHeader | None = None
77
81
  status: Union[TaskStatus, None] = None
78
- def _get_coroutine_name(self, coroutine: asyncio.coroutines) -> str:
79
- if inspect.iscoroutine(coroutine):
80
- return coroutine.cr_code.co_name
81
- return "<unknown>"
82
+ queue: Literal["slow", "fast"] | None = "slow"
83
+ def _get_coroutine_name(self, func: Any) -> str:
84
+ if inspect.iscoroutine(func):
85
+ return func.cr_code.co_name
86
+ return func.__qualname__ if callable(func) else str(func)
82
87
  def __init__(self, **data):
83
88
  def _metadata_extra(data: str) -> dict[str,str] | None:
84
89
  if data:
@@ -115,7 +120,8 @@ class TaskEntry(IdentifiableEntity):
115
120
  class TaskStatistics(BaseModel):
116
121
  class TaskStatisticExecutionInfo(BaseModel):
117
122
  retention_days: float = config.robot_task_retention_days
118
- max_concurrent: int
123
+ max_parallelism: int
124
+ slot_available: dict[str,int]
119
125
  pid: int = os.getpid()
120
126
  running: list[TaskStatus]
121
127
  slowest: list
@@ -134,14 +140,29 @@ class TaskStatistics(BaseModel):
134
140
 
135
141
  #region interface
136
142
  class TaskManagerStrategy(ABC):
137
- def __init__(self, max_concurrent_tasks: int = max(1,floor(config.robot_task_max_total_parallelism / config.runtime_options().number_of_workers))):
138
- self.max_concurrent_tasks = max_concurrent_tasks
139
- self.semaphore = asyncio.Semaphore(self.max_concurrent_tasks)
143
+ def __init__(self, max_concurrent_tasks: Optional[int] = None):
144
+ if max_concurrent_tasks is None:
145
+ workers = config.runtime_options().number_of_workers
146
+ max_concurrent_tasks = max(1, floor(config.robot_task_max_total_parallelism / max(1, workers)))
147
+ self.max_parallelism = max_concurrent_tasks
148
+ self.semaphore = {"slow": asyncio.Semaphore(max_concurrent_tasks), "fast": asyncio.Semaphore(max_concurrent_tasks*2)}
140
149
  self.running_tasks = dict[str, TaskEntry]()
141
150
  self.loop = asyncio.get_event_loop()
142
151
 
143
152
  @abstractmethod
144
- def create_task(self, coroutine, headers: TaskHeader | None = None) -> IdentifiableEntity:
153
+ def create_task(self, coroutine, headers: TaskHeader | None = None, queue: Literal["slow", "fast"] | None = "slow") -> IdentifiableEntity:
154
+ """Create a new task.
155
+ Args:
156
+ coroutine (_type_): coroutine or callable to be executed.
157
+ headers (TaskHeader | None, optional): for api call, http headers to include with the task. Defaults to None.
158
+ Returns:
159
+ IdentifiableEntity: The created task id.
160
+ Usage:
161
+ from ws_bom_robot_app.task_manager import task_manager
162
+ task_manager.create_task(my_coroutine, headers=my_headers) -> coroutine executed in-process
163
+ task_manager.create_task(lambda: my_coroutine, headers=my_headers) -> callable using subprocess
164
+ task_manager.create_task(lambda: my_coroutine, headers=my_headers, queue="fast") -> callable using subprocess with "fast" queue
165
+ """
145
166
  pass
146
167
 
147
168
  @abstractmethod
@@ -172,49 +193,173 @@ class TaskManagerStrategy(ABC):
172
193
  def task_cleanup_rule(self, task: TaskEntry) -> bool:
173
194
  return task.status.metadata.start_at and datetime.fromisoformat(task.status.metadata.start_at) < datetime.now() - timedelta(days=config.robot_task_retention_days)
174
195
 
175
- def task_done_callback(self, task_entry: TaskEntry, headers: TaskHeader | None = None) -> Callable:
176
- def callback(task: asyncio.Task):
196
+ def _update_task_by_event(self, task_entry: TaskEntry, status: str, output: Any) -> None:
197
+ if status == "completed":
198
+ task_entry.status.status = "completed"
199
+ task_entry.status.result = output
200
+ elif status == "failure":
201
+ task_entry.status.status = "failure"
202
+ task_entry.status.error = str(output)
203
+ _log.error(f"Task {task_entry.id} failed with error: {output}")
204
+ else:
205
+ task_entry.status.metadata.end_at = str(datetime.now().isoformat())
206
+ #strategy-specific behavior
207
+ self.update_task_status(task_entry)
208
+ #remove from running tasks
209
+ if task_entry.id in self.running_tasks:
210
+ del self.running_tasks[task_entry.id]
211
+ #notify webhooks: a task has completed or failed, if failed with retry policy the task remains in pending state, and will not be notified until complete/failure
212
+ if task_entry.status.status in ["completed","failure"]:
213
+ if task_entry.headers and task_entry.headers.x_ws_bom_webhooks:
214
+ try:
215
+ asyncio.create_task(
216
+ WebhookNotifier().notify_webhook(task_entry.status, task_entry.headers.x_ws_bom_webhooks)
217
+ )
218
+ except Exception as e:
219
+ _log.error(f"Failed to schedule webhook notification for task {task_entry.id}: {e}")
220
+
221
+ def task_done_callback(self, task_entry: TaskEntry) -> Callable:
222
+ def callback(task: asyncio.Task, context: Any | None = None):
177
223
  try:
178
- result = task_entry.task.result()
179
- task_entry.status.status = "completed"
180
- task_entry.status.result = result
224
+ result = task.result()
225
+ self._update_task_by_event(task_entry, "completed", result)
181
226
  except Exception as e:
182
- task_entry.status.status = "failure"
183
- task_entry.status.error = str(e)
227
+ self._update_task_by_event(task_entry, "failure", e)
184
228
  finally:
185
- task_entry.status.metadata.end_at = str(datetime.now().isoformat())
186
- #strategy-specific behavior
187
- self.update_task_status(task_entry)
188
- #remove from running tasks
189
- del self.running_tasks[task_entry.id]
190
- #notify webhooks
191
- if headers and headers.x_ws_bom_webhooks:
192
- asyncio.create_task(
193
- WebhookNotifier().notify_webhook(task_entry.status,headers.x_ws_bom_webhooks)
194
- )
229
+ self._update_task_by_event(task_entry, "callback", None)
195
230
  return callback
196
231
 
197
- def create_task_entry(self, coroutine: asyncio.coroutines, headers: TaskHeader | None = None) -> TaskEntry:
232
+ def create_task_entry(self, coroutine_or_callable: Any, headers: TaskHeader | None = None, queue: Literal["slow", "fast"] | None = "slow") -> TaskEntry:
233
+ """Create a new task entry.
234
+
235
+ Args:
236
+ coroutine_or_callable (Any): The coroutine or callable to be executed.
237
+ headers (TaskHeader | None, optional): Headers to include with the task. Defaults to None.
238
+ Raises:
239
+ TypeError: If the input is not a coroutine or callable.
240
+ Returns:
241
+ TaskEntry: The created task entry.
242
+ """
198
243
  _id = headers and headers.x_ws_bom_msg_id or str(uuid4())
199
- task = TaskEntry(
244
+ # Detect coroutine object
245
+ if inspect.iscoroutine(coroutine_or_callable):
246
+ can_use_subprocess = False
247
+ elif callable(coroutine_or_callable):
248
+ can_use_subprocess = True
249
+ else:
250
+ raise TypeError(
251
+ f"Expected coroutine object or callable, got {type(coroutine_or_callable)}"
252
+ )
253
+ task_entry = TaskEntry(
200
254
  id=_id,
201
- coroutine=coroutine,
202
- headers=headers)
203
- self.loop.create_task(self._run_task_with_semaphore(task)) # run the task
204
- return task
255
+ coroutine=coroutine_or_callable,
256
+ headers=headers,
257
+ queue=queue
258
+ )
259
+ # Store hint for subprocess capability
260
+ task_entry.status.metadata.extra = task_entry.status.metadata.extra or {}
261
+ task_entry.status.metadata.extra["can_use_subprocess"] = can_use_subprocess
262
+ try:
263
+ asyncio.create_task(self._run_task_with_semaphore(task_entry)) # run the task
264
+ except Exception as e:
265
+ _log.error(f"Error occurred while creating task {task_entry.id}: {e}")
266
+ return task_entry
205
267
 
206
268
  async def _run_task_with_semaphore(self, task_entry: TaskEntry):
207
269
  """Run a task with semaphore control to limit concurrency."""
208
- async with self.semaphore:
270
+ async with self.semaphore[task_entry.queue]:
209
271
  await self._execute_task(task_entry)
210
272
 
273
+ async def _monitor_subprocess(self, task_entry: TaskEntry, proc, conn):
274
+ try:
275
+ # Wait for the worker to send bytes (this blocks, so run via executor wrapper)
276
+ data_bytes = await _recv_from_connection_async(conn)
277
+ # unpickle bytes to get payload
278
+ try:
279
+ payload = _pickler.loads(data_bytes)
280
+ except Exception:
281
+ # fallback if pickler fails
282
+ payload = ("err", {"error": "Failed to unpickle subprocess result"})
283
+ if isinstance(payload, tuple) and payload[0] == "ok":
284
+ result = payload[1]
285
+ # write results into task_entry
286
+ self._update_task_by_event(task_entry, "completed", result)
287
+ else:
288
+ # error
289
+ err_info = payload[1]["error"] if isinstance(payload, tuple) else str(payload)
290
+ self._update_task_by_event(task_entry, "failure", err_info) # give up, no retry
291
+ except Exception:
292
+ # maybe subprocess is no more alive / killed due to memory pressure
293
+ if task_entry.status.retry < config.robot_task_mp_max_retries:
294
+ task_entry.status.retry += 1
295
+ _log.warning(f"Task {task_entry.id} failure, retrying {task_entry.status.retry}...")
296
+ async def delayed_retry():
297
+ _delay = config.robot_task_mp_retry_delay # help to backpressure when overloaded
298
+ if self.semaphore[task_entry.queue]._value > 0: # free semaphore slots available
299
+ _delay = 5 # small/no delay if retry can run immediately
300
+ await asyncio.sleep(_delay) # delay in seconds
301
+ await self._run_task_with_semaphore(task_entry)
302
+ asyncio.create_task(delayed_retry())
303
+ # semaphore is released, so new task can be executed
304
+ return
305
+ else:
306
+ self._update_task_by_event(task_entry, "failure", "subprocess monitor error: failed to receive data from connection")
307
+ finally:
308
+ # ensure process termination / cleanup
309
+ try:
310
+ conn.close()
311
+ except Exception:
312
+ pass
313
+ try:
314
+ if proc.is_alive():
315
+ proc.terminate()
316
+ proc.join(timeout=1)
317
+ except Exception:
318
+ pass
319
+ # callback
320
+ self._update_task_by_event(task_entry, "callback", None)
321
+
211
322
  async def _execute_task(self, task_entry: TaskEntry):
212
- """Execute a task and handle its lifecycle."""
323
+ """
324
+ Execute the task. Try to run it inside a subprocess (if serializable).
325
+ If subprocess is used, we create a monitor asyncio.Task that waits for the subprocess result
326
+ and then calls the same task_done_callback to finalize and persist state.
327
+ If subprocess cannot be used, fall back to in-process behavior.
328
+ """
213
329
  self.running_tasks[task_entry.id]=task_entry
214
330
  task_entry.status.metadata.start_at = str(datetime.now().isoformat())
215
- task_entry.task = asyncio.create_task(task_entry.coroutine)
216
- task_entry.task.add_done_callback(self.task_done_callback(task_entry, task_entry.headers))
217
- await task_entry.task
331
+ # try to spawn subprocess (non-blocking)
332
+ can_use_subprocess = task_entry.status.metadata.extra.get("can_use_subprocess", False)
333
+ if config.robot_task_mp_enable and can_use_subprocess:
334
+ proc, conn, used_subprocess = _start_subprocess_for_coroutine(task_entry.coroutine)
335
+ if used_subprocess and proc is not None and conn is not None:
336
+ # monitor subprocess asynchronously
337
+ task_entry.status.status = "pending"
338
+ task_entry.status.metadata.pid_child = proc.pid
339
+ _log.info(f"Task {task_entry.id} started in subprocess (pid={proc.pid})")
340
+ # await monitor process, then return: important to acquire semaphore
341
+ await self._monitor_subprocess(task_entry, proc, conn)
342
+ return
343
+ # default fallback (in-process)
344
+ try:
345
+ async def _callable_to_coroutine(func: Any) -> Any:
346
+ if callable(func) and not inspect.iscoroutine(func):
347
+ result = func()
348
+ if inspect.iscoroutine(result):
349
+ return await result
350
+ return result
351
+ elif inspect.iscoroutine(func):
352
+ return await func
353
+ return func
354
+ task_entry.task = asyncio.create_task(_callable_to_coroutine(task_entry.coroutine))
355
+ task_entry.task.add_done_callback(self.task_done_callback(task_entry))
356
+ _log.info(f"Starting task {task_entry.id} in-process with coroutine {task_entry._get_coroutine_name(task_entry.coroutine)}")
357
+ await task_entry.task
358
+ except Exception as e:
359
+ _error = f"Error occurred while executing task {task_entry.id}: {e}"
360
+ _log.error(_error)
361
+ self._update_task_by_event(task_entry, "failure", _error)
362
+ self._update_task_by_event(task_entry, "callback", None)
218
363
 
219
364
  def running_task(self):
220
365
  return self.running_tasks.values()
@@ -250,7 +395,8 @@ class TaskManagerStrategy(ABC):
250
395
  ),
251
396
  exec_info=TaskStatistics.TaskStatisticExecutionInfo(
252
397
  retention_days=config.robot_task_retention_days,
253
- max_concurrent=self.max_concurrent_tasks,
398
+ max_parallelism=self.max_parallelism,
399
+ slot_available={queue: self.semaphore[queue]._value for queue in self.semaphore},
254
400
  running=[task.status for task in self.running_task()],
255
401
  slowest=_slowest
256
402
  )
@@ -258,14 +404,14 @@ class TaskManagerStrategy(ABC):
258
404
 
259
405
  #endregion
260
406
 
261
- #memory implementation
407
+ #region memory implementation
262
408
  class MemoryTaskManagerStrategy(TaskManagerStrategy):
263
- def __init__(self):
264
- super().__init__()
409
+ def __init__(self, max_concurrent_tasks: Optional[int] = None):
410
+ super().__init__(max_concurrent_tasks)
265
411
  self.tasks: Dict[str, TaskEntry] = {}
266
412
 
267
- def create_task(self, coroutine: asyncio.coroutines, headers: TaskHeader | None = None) -> IdentifiableEntity:
268
- task = self.create_task_entry(coroutine, headers)
413
+ def create_task(self, coroutine: Any, headers: TaskHeader | None = None, queue: Literal["slow", "fast"] | None = "slow") -> IdentifiableEntity:
414
+ task = self.create_task_entry(coroutine, headers, queue)
269
415
  self.tasks[task.id] = task
270
416
  return IdentifiableEntity(id=task.id)
271
417
 
@@ -292,7 +438,7 @@ class MemoryTaskManagerStrategy(TaskManagerStrategy):
292
438
 
293
439
  #endregion
294
440
 
295
- #db implementation
441
+ #region db implementation
296
442
  Base = registry().generate_base()
297
443
  class TaskEntryModel(Base):
298
444
  __tablename__ = "entry"
@@ -302,8 +448,8 @@ class TaskEntryModel(Base):
302
448
  arbitrary_types_allowed=True
303
449
  )
304
450
  class DatabaseTaskManagerStrategy(TaskManagerStrategy):
305
- def __init__(self, db_url: str = "sqlite:///.data/db/tasks.sqlite"):
306
- super().__init__()
451
+ def __init__(self, db_url: str = f"sqlite:///{config.robot_data_folder}/db/tasks.sqlite", max_concurrent_tasks: Optional[int] = None):
452
+ super().__init__(max_concurrent_tasks)
307
453
  self.engine = create_engine(db_url)
308
454
  self.Session = sessionmaker(bind=self.engine)
309
455
  Base.metadata.create_all(self.engine)
@@ -353,11 +499,16 @@ class DatabaseTaskManagerStrategy(TaskManagerStrategy):
353
499
  #region global
354
500
  def __get_taskmanager_strategy() -> TaskManagerStrategy:
355
501
  """ Factory function to get the appropriate task manager strategy based on the runtime configuration."""
356
- if config.runtime_options().is_multi_process:
357
- return DatabaseTaskManagerStrategy()
358
- return MemoryTaskManagerStrategy()
502
+ if config.robot_task_strategy == 'memory':
503
+ return MemoryTaskManagerStrategy()
504
+ return DatabaseTaskManagerStrategy()
359
505
  task_manager = __get_taskmanager_strategy()
360
506
  _log.info(f"Task manager strategy: {task_manager.__class__.__name__}")
507
+
508
+ def task_cleanup():
509
+ _log.info("Cleaning up tasks...")
510
+ task_manager.cleanup_task()
511
+ _log.info("Task cleanup complete.")
361
512
  #endregion
362
513
 
363
514
  #region api
ws_bom_robot_app/util.py CHANGED
@@ -22,6 +22,12 @@ def logger_instance(name: str) -> logging.Logger:
22
22
  _log: logging.Logger = locals().get("_loc", logger_instance(__name__))
23
23
  #endregion
24
24
 
25
+ #region task
26
+ def is_app_subprocess():
27
+ """Check if we're running a task in a subprocess."""
28
+ return os.environ.get('IS_ROBOT_APP_SUBPROCESS', '').lower() == 'true'
29
+ #endregion
30
+
25
31
  #region cache
26
32
  _cache = {}
27
33
  _cache_timestamps = {}