ws-bom-robot-app 0.0.63__py3-none-any.whl → 0.0.103__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. ws_bom_robot_app/config.py +30 -8
  2. ws_bom_robot_app/cron_manager.py +13 -12
  3. ws_bom_robot_app/llm/agent_context.py +1 -1
  4. ws_bom_robot_app/llm/agent_handler.py +11 -12
  5. ws_bom_robot_app/llm/agent_lcel.py +80 -18
  6. ws_bom_robot_app/llm/api.py +69 -7
  7. ws_bom_robot_app/llm/evaluator.py +319 -0
  8. ws_bom_robot_app/llm/main.py +51 -28
  9. ws_bom_robot_app/llm/models/api.py +40 -6
  10. ws_bom_robot_app/llm/nebuly_handler.py +18 -15
  11. ws_bom_robot_app/llm/providers/llm_manager.py +233 -75
  12. ws_bom_robot_app/llm/tools/tool_builder.py +4 -1
  13. ws_bom_robot_app/llm/tools/tool_manager.py +48 -22
  14. ws_bom_robot_app/llm/utils/chunker.py +6 -1
  15. ws_bom_robot_app/llm/utils/cleanup.py +81 -0
  16. ws_bom_robot_app/llm/utils/cms.py +60 -14
  17. ws_bom_robot_app/llm/utils/download.py +112 -8
  18. ws_bom_robot_app/llm/vector_store/db/base.py +50 -0
  19. ws_bom_robot_app/llm/vector_store/db/chroma.py +28 -8
  20. ws_bom_robot_app/llm/vector_store/db/faiss.py +35 -8
  21. ws_bom_robot_app/llm/vector_store/db/qdrant.py +29 -14
  22. ws_bom_robot_app/llm/vector_store/integration/api.py +216 -0
  23. ws_bom_robot_app/llm/vector_store/integration/azure.py +1 -1
  24. ws_bom_robot_app/llm/vector_store/integration/base.py +58 -15
  25. ws_bom_robot_app/llm/vector_store/integration/confluence.py +33 -5
  26. ws_bom_robot_app/llm/vector_store/integration/dropbox.py +1 -1
  27. ws_bom_robot_app/llm/vector_store/integration/gcs.py +1 -1
  28. ws_bom_robot_app/llm/vector_store/integration/github.py +22 -22
  29. ws_bom_robot_app/llm/vector_store/integration/googledrive.py +46 -17
  30. ws_bom_robot_app/llm/vector_store/integration/jira.py +93 -60
  31. ws_bom_robot_app/llm/vector_store/integration/manager.py +6 -2
  32. ws_bom_robot_app/llm/vector_store/integration/s3.py +1 -1
  33. ws_bom_robot_app/llm/vector_store/integration/sftp.py +1 -1
  34. ws_bom_robot_app/llm/vector_store/integration/sharepoint.py +7 -14
  35. ws_bom_robot_app/llm/vector_store/integration/shopify.py +143 -0
  36. ws_bom_robot_app/llm/vector_store/integration/sitemap.py +6 -1
  37. ws_bom_robot_app/llm/vector_store/integration/slack.py +3 -2
  38. ws_bom_robot_app/llm/vector_store/integration/thron.py +236 -0
  39. ws_bom_robot_app/llm/vector_store/loader/base.py +52 -8
  40. ws_bom_robot_app/llm/vector_store/loader/docling.py +71 -33
  41. ws_bom_robot_app/main.py +148 -146
  42. ws_bom_robot_app/subprocess_runner.py +106 -0
  43. ws_bom_robot_app/task_manager.py +204 -53
  44. ws_bom_robot_app/util.py +6 -0
  45. {ws_bom_robot_app-0.0.63.dist-info → ws_bom_robot_app-0.0.103.dist-info}/METADATA +158 -75
  46. ws_bom_robot_app-0.0.103.dist-info/RECORD +76 -0
  47. ws_bom_robot_app/llm/settings.py +0 -4
  48. ws_bom_robot_app/llm/utils/kb.py +0 -34
  49. ws_bom_robot_app-0.0.63.dist-info/RECORD +0 -72
  50. {ws_bom_robot_app-0.0.63.dist-info → ws_bom_robot_app-0.0.103.dist-info}/WHEEL +0 -0
  51. {ws_bom_robot_app-0.0.63.dist-info → ws_bom_robot_app-0.0.103.dist-info}/top_level.txt +0 -0
ws_bom_robot_app/main.py CHANGED
@@ -1,154 +1,156 @@
1
1
  import datetime
2
- import platform
3
- from fastapi.responses import FileResponse
4
- import uvicorn, os, sys
5
- from fastapi import FastAPI, Depends
6
- from fastapi.openapi.docs import get_swagger_ui_html
7
- from fastapi.openapi.utils import get_openapi
8
- from ws_bom_robot_app.auth import authenticate
9
- from ws_bom_robot_app.config import config
10
- from ws_bom_robot_app.llm.api import router as llm
11
- from ws_bom_robot_app.task_manager import router as task
12
- from ws_bom_robot_app.cron_manager import (
13
- router as cron,
14
- cron_manager)
15
- from ws_bom_robot_app.util import _log
16
-
2
+ from fastapi import FastAPI
3
+ from ws_bom_robot_app.util import is_app_subprocess
17
4
  _uptime = datetime.datetime.now()
18
- cron_manager.start()
19
5
  app = FastAPI(redoc_url=None,docs_url=None,openapi_url=None)
20
- app.include_router(llm,dependencies=[Depends(authenticate)])
21
- app.include_router(task,dependencies=[Depends(authenticate)])
22
- app.include_router(cron,dependencies=[Depends(authenticate)])
23
6
 
24
- @app.get("/")
25
- async def root():
26
- return health()
27
- @app.get("/favicon.ico")
28
- async def favicon():
29
- return FileResponse("./favicon.ico")
7
+ if not is_app_subprocess():
8
+ import platform
9
+ from fastapi.responses import FileResponse
10
+ import os, sys
11
+ from fastapi import Depends
12
+ from fastapi.openapi.docs import get_swagger_ui_html, get_redoc_html
13
+ from fastapi.openapi.utils import get_openapi
14
+ from ws_bom_robot_app.auth import authenticate
15
+ from ws_bom_robot_app.config import config
16
+ from ws_bom_robot_app.util import _log
17
+ from ws_bom_robot_app.llm.api import router as llm
18
+ from ws_bom_robot_app.task_manager import router as task
19
+ from ws_bom_robot_app.cron_manager import (
20
+ router as cron,
21
+ cron_manager)
22
+ cron_manager.start()
23
+ app.include_router(llm,dependencies=[Depends(authenticate)])
24
+ app.include_router(task,dependencies=[Depends(authenticate)])
25
+ app.include_router(cron,dependencies=[Depends(authenticate)])
30
26
 
31
- @app.get("/docs", include_in_schema=False)
32
- async def get_swagger_documentation(authenticate: bool = Depends(authenticate)):
33
- return get_swagger_ui_html(openapi_url="/openapi.json", title="docs")
34
- @app.get("/openapi.json", include_in_schema=False)
35
- async def openapi(authenticate: bool = Depends(authenticate)):
36
- return get_openapi(title=app.title, version=app.version, routes=app.routes)
27
+ @app.get("/")
28
+ async def root():
29
+ return health()
30
+ @app.get("/favicon.ico")
31
+ async def favicon():
32
+ return FileResponse("./favicon.ico")
37
33
 
38
- @app.get("/api/health",tags=["diag"])
39
- def health():
40
- return {"status": "ok"}
41
- def __get_size(bytes, suffix="B"):
42
- """
43
- Scale bytes to its proper format
44
- e.g:
45
- 1253656 => '1.20MB'
46
- 1253656678 => '1.17GB'
47
- """
48
- factor = 1024
49
- for unit in ["", "K", "M", "G", "T", "P"]:
50
- if bytes < factor:
51
- return f"{bytes:.2f}{unit}{suffix}"
52
- bytes /= factor
53
- def __get_disk_info():
54
- import psutil
55
- partitions = psutil.disk_partitions()
56
- _disks:list = []
57
- for partition in partitions:
58
- device = partition.device
59
- mountpoint = partition.mountpoint
60
- fstype = partition.fstype
61
- try:
62
- usage = psutil.disk_usage(mountpoint)
63
- except PermissionError:
64
- continue
65
- total = __get_size(usage.total)
66
- used = __get_size(usage.used)
67
- free = __get_size(usage.free)
68
- percent = f"{usage.percent}%"
69
- _disks.append({"device": device, "mountpoint": mountpoint, "fstype": fstype, "total": total, "used": used, "free": free, "percent": percent})
70
- return _disks
71
- @app.get("/api/diag",tags=["diag"])
72
- def diag(authenticate: bool = Depends(authenticate)):
73
- import pkg_resources, psutil
74
- from ws_bom_robot_app.llm.providers.llm_manager import LlmManager as wsllm
75
- from ws_bom_robot_app.llm.vector_store.db.manager import VectorDbManager as wsdb
76
- from ws_bom_robot_app.llm.vector_store.loader.base import Loader as wsldr
77
- from ws_bom_robot_app.llm.vector_store.integration.manager import IntegrationManager as wsim
78
- from ws_bom_robot_app.llm.tools.tool_manager import ToolManager as wstm
79
- from ws_bom_robot_app.llm.agent_description import AgentDescriptor as wsad
34
+ @app.get("/docs", include_in_schema=False)
35
+ async def get_swagger_documentation(authenticate: bool = Depends(authenticate)):
36
+ return get_swagger_ui_html(openapi_url="/openapi.json", title="docs")
37
+ @app.get("/redoc", include_in_schema=False)
38
+ async def get_redoc_documentation(authenticate: bool = Depends(authenticate)):
39
+ return get_redoc_html(openapi_url="/openapi.json", title="docs")
40
+ @app.get("/openapi.json", include_in_schema=False)
41
+ async def openapi(authenticate: bool = Depends(authenticate)):
42
+ return get_openapi(title=app.title, version=app.version, routes=app.routes)
80
43
 
81
- svmem = psutil.virtual_memory()
82
- swap = psutil.swap_memory()
83
- try:
84
- ws_bom_robot_app_version = pkg_resources.get_distribution("ws_bom_robot_app").version
85
- except:
86
- ws_bom_robot_app_version = "unknown"
87
- peer_process_ids = [c.pid for c in psutil.Process(os.getppid()).children()] if config.runtime_options().is_multi_process else None
88
- return {
89
- "status":"ok",
90
- "uptime": {'from':_uptime,'elapsed':str(datetime.datetime.now()-_uptime)},
91
- "system": {
92
- "platform": {
93
- "node": platform.node(),
94
- "system": platform.system(),
95
- "platform": platform.platform(),
96
- "version": platform.version(),
97
- "type": platform.machine(),
98
- "processor": platform.processor(),
99
- "architecture": platform.architecture()
100
- },
101
- "cpu": {
102
- "physical_core": psutil.cpu_count(logical=False),
103
- "total_core": psutil.cpu_count(logical=True),
104
- "load": f"{psutil.cpu_percent(interval=1)}%"
105
- },
106
- "memory": {
107
- "total": f"{__get_size(svmem.total)}",
108
- "available": f"{__get_size(svmem.available)}",
109
- "used": f"{__get_size(svmem.used)}",
110
- "free": f"{__get_size(svmem.free)}",
111
- "percent": f"{svmem.percent}%"
112
- },
113
- "swap": {
114
- "total": f"{__get_size(swap.total)}",
115
- "used": f"{__get_size(swap.used)}",
116
- "free": f"{__get_size(swap.free)}",
117
- "percent": f"{swap.percent}%"
118
- },
119
- "disk": __get_disk_info(),
120
- "sys": {
121
- "version": sys.version,
122
- "platform": sys.platform,
123
- "executable": sys.executable,
124
- "args": {k: arg for k, arg in enumerate(sys.argv)}
125
- },
126
- "os": {
127
- "ppid": os.getppid(),
128
- "pid": os.getpid(),
129
- "pids": peer_process_ids,
130
- "cwd": os.getcwd(),
131
- "ws_bom_robot_app": ws_bom_robot_app_version,
132
- "env": os.environ,
133
- },
134
- },
135
- "config":config,
136
- "runtime":config.runtime_options(),
137
- "extension": {
138
- "provider":({item[0]: type(item[1]).__name__} for item in wsllm._list.items()),
139
- "db":({item[0]: type(item[1]).__name__} for item in wsdb._list.items()),
140
- "loader": ({item[0]: item[1].loader.__name__ if item[1] else None} for item in sorted(wsldr._list.items(), key=lambda x: x[0]) if item[1]),
141
- "integration":({item[0]: type(item[1]).__name__} for item in wsim._list.items()),
142
- "tool": ({item[0]: item[1].function.__name__} for item in wstm._list.items()),
143
- "agent":({item[0]: type(item[1]).__name__} for item in wsad._list.items())
144
- }
145
- }
146
- @app.post("/diag/reload",tags=["diag"])
147
- def reset(authenticate: bool = Depends(authenticate)):
148
- _log.info("restart server")
149
- with open(".reloadfile","w") as f:
150
- f.write("")
44
+ @app.get("/api/health",tags=["diag"])
45
+ def health():
46
+ return {"status": "ok"}
47
+ def __get_size(bytes, suffix="B"):
48
+ """
49
+ Scale bytes to its proper format
50
+ e.g:
51
+ 1253656 => '1.20MB'
52
+ 1253656678 => '1.17GB'
53
+ """
54
+ factor = 1024
55
+ for unit in ["", "K", "M", "G", "T", "P"]:
56
+ if bytes < factor:
57
+ return f"{bytes:.2f}{unit}{suffix}"
58
+ bytes /= factor
59
+ def __get_disk_info():
60
+ import psutil
61
+ partitions = psutil.disk_partitions()
62
+ _disks:list = []
63
+ for partition in partitions:
64
+ device = partition.device
65
+ mountpoint = partition.mountpoint
66
+ fstype = partition.fstype
67
+ try:
68
+ usage = psutil.disk_usage(mountpoint)
69
+ except PermissionError:
70
+ continue
71
+ total = __get_size(usage.total)
72
+ used = __get_size(usage.used)
73
+ free = __get_size(usage.free)
74
+ percent = f"{usage.percent}%"
75
+ _disks.append({"device": device, "mountpoint": mountpoint, "fstype": fstype, "total": total, "used": used, "free": free, "percent": percent})
76
+ return _disks
77
+ @app.get("/api/diag",tags=["diag"])
78
+ def diag(authenticate: bool = Depends(authenticate)):
79
+ import importlib,psutil
80
+ from ws_bom_robot_app.llm.providers.llm_manager import LlmManager as wsllm
81
+ from ws_bom_robot_app.llm.vector_store.db.manager import VectorDbManager as wsdb
82
+ from ws_bom_robot_app.llm.vector_store.loader.base import Loader as wsldr
83
+ from ws_bom_robot_app.llm.vector_store.integration.manager import IntegrationManager as wsim
84
+ from ws_bom_robot_app.llm.tools.tool_manager import ToolManager as wstm
85
+ from ws_bom_robot_app.llm.agent_description import AgentDescriptor as wsad
151
86
 
152
- # Start the FastAPI server
153
- if __name__ == "__main__":
154
- uvicorn.run(app, host="0.0.0.0", port=6001, env_file="../.env", reload=True, log_level="debug")
87
+ svmem = psutil.virtual_memory()
88
+ swap = psutil.swap_memory()
89
+ try:
90
+ ws_bom_robot_app_version = importlib.metadata.version("ws_bom_robot_app")
91
+ except:
92
+ ws_bom_robot_app_version = "unknown"
93
+ peer_process_ids = [c.pid for c in psutil.Process(os.getppid()).children()] if config.runtime_options().is_multi_process else None
94
+ return {
95
+ "status":"ok",
96
+ "uptime": {'from':_uptime,'elapsed':str(datetime.datetime.now()-_uptime)},
97
+ "system": {
98
+ "platform": {
99
+ "node": platform.node(),
100
+ "system": platform.system(),
101
+ "platform": platform.platform(),
102
+ "version": platform.version(),
103
+ "type": platform.machine(),
104
+ "processor": platform.processor(),
105
+ "architecture": platform.architecture()
106
+ },
107
+ "cpu": {
108
+ "physical_core": psutil.cpu_count(logical=False),
109
+ "total_core": psutil.cpu_count(logical=True),
110
+ "load": f"{psutil.cpu_percent(interval=1)}%"
111
+ },
112
+ "memory": {
113
+ "total": f"{__get_size(svmem.total)}",
114
+ "available": f"{__get_size(svmem.available)}",
115
+ "used": f"{__get_size(svmem.used)}",
116
+ "free": f"{__get_size(svmem.free)}",
117
+ "percent": f"{svmem.percent}%"
118
+ },
119
+ "swap": {
120
+ "total": f"{__get_size(swap.total)}",
121
+ "used": f"{__get_size(swap.used)}",
122
+ "free": f"{__get_size(swap.free)}",
123
+ "percent": f"{swap.percent}%"
124
+ },
125
+ "disk": __get_disk_info(),
126
+ "sys": {
127
+ "version": sys.version,
128
+ "platform": sys.platform,
129
+ "executable": sys.executable,
130
+ "args": {k: arg for k, arg in enumerate(sys.argv)}
131
+ },
132
+ "os": {
133
+ "ppid": os.getppid(),
134
+ "pid": os.getpid(),
135
+ "pids": peer_process_ids,
136
+ "cwd": os.getcwd(),
137
+ "ws_bom_robot_app": ws_bom_robot_app_version,
138
+ "env": os.environ,
139
+ },
140
+ },
141
+ "config":config,
142
+ "runtime":config.runtime_options(),
143
+ "extension": {
144
+ "provider":({item[0]: type(item[1]).__name__} for item in wsllm._list.items()),
145
+ "db":({item[0]: type(item[1]).__name__} for item in wsdb._list.items()),
146
+ "loader": ({item[0]: item[1].loader.__name__ if item[1] else None} for item in sorted(wsldr._list.items(), key=lambda x: x[0]) if item[1]),
147
+ "integration":({item[0]: type(item[1]).__name__} for item in wsim._list.items()),
148
+ "tool": ({item[0]: item[1].function.__name__} for item in wstm._list.items()),
149
+ "agent":({item[0]: type(item[1]).__name__} for item in wsad._list.items())
150
+ }
151
+ }
152
+ @app.post("/diag/reload",tags=["diag"])
153
+ def reset(authenticate: bool = Depends(authenticate)):
154
+ _log.info("restart server")
155
+ with open(".reloadfile","w") as f:
156
+ f.write("")
@@ -0,0 +1,106 @@
1
+ import logging
2
+ import multiprocessing as mp
3
+ from multiprocessing.connection import Connection
4
+ import dill as _pickler
5
+ import types, traceback
6
+ import asyncio, sys
7
+ from ws_bom_robot_app.config import config
8
+
9
+ def _worker_run_pickled(serialized_task: bytes, conn: Connection):
10
+ """
11
+ Unpickle the object (should be an awaitable or callable), run it inside its own asyncio loop,
12
+ capture return value or exception and send back via conn.send((ok_flag, payload_serialized)).
13
+ This runs in a separate process and must be top-level for multiprocessing.
14
+ """
15
+ import os
16
+ # mark as a subprocess
17
+ os.environ['IS_ROBOT_APP_SUBPROCESS'] = 'true'
18
+ try:
19
+ if _pickler is None:
20
+ raise RuntimeError("No pickler available in worker process.")
21
+
22
+ obj = _pickler.loads(serialized_task)
23
+
24
+ # If obj is a coroutine object, run directly; if it's a callable, call it and maybe await result.
25
+ async def _wrap_and_run(o):
26
+ if asyncio.iscoroutine(o):
27
+ return await o
28
+ elif isinstance(o, types.FunctionType) or callable(o):
29
+ # call it; if returns coroutine, await it
30
+ result = o()
31
+ if asyncio.iscoroutine(result):
32
+ return await result
33
+ return result
34
+ else:
35
+ # not callable / awaitable
36
+ return o
37
+
38
+ # Run inside asyncio.run (fresh loop)
39
+ result = asyncio.run(_wrap_and_run(obj))
40
+ # try to pickle result for sending, if fails, str() it
41
+ try:
42
+ payload = _pickler.dumps(("ok", result))
43
+ except Exception:
44
+ payload = _pickler.dumps(("ok", str(result)))
45
+ conn.send_bytes(payload)
46
+ except Exception as e:
47
+ # send back the error details
48
+ try:
49
+ tb = traceback.format_exc()
50
+ payload = _pickler.dumps(("err", {"error": str(e), "traceback": tb}))
51
+ conn.send_bytes(payload)
52
+ except Exception:
53
+ # last resort: send plain text
54
+ try:
55
+ conn.send_bytes(b'ERR:' + str(e).encode("utf-8"))
56
+ except Exception:
57
+ pass
58
+ finally:
59
+ try:
60
+ conn.close()
61
+ except Exception:
62
+ pass
63
+ async def _recv_from_connection_async(conn: Connection):
64
+ """
65
+ Blocking recv wrapped for asyncio using a threadpool.
66
+ We expect worker to use conn.send_bytes(payload) — we use conn.recv_bytes() to get bytes.
67
+ """
68
+ loop = asyncio.get_event_loop()
69
+ return await loop.run_in_executor(None, conn.recv_bytes) # blocking call inside executor
70
+ def _start_subprocess_for_coroutine(coroutine_obj):
71
+ """
72
+ Try to start a subprocess that will run the provided coroutine/callable.
73
+ Returns tuple (process, parent_conn, used_subprocess_flag)
74
+ If cannot serialize, returns (None, None, False)
75
+ """
76
+ def _get_mp_start_method():
77
+ """Get the multiprocessing start method.
78
+
79
+ For Windows + Jupyter compatibility, 'spawn' is required
80
+ 'spawn' guarantees that every worker starts fresh and doesn't carry Python heap or native allocations from the parent.
81
+ 'fork' to get faster startup and lower initial memory cost, carries over everything in parent memory, including global variables and open resources: can be unsafe with threads, async loops
82
+
83
+ Returns:
84
+ str: The multiprocessing start method.
85
+ """
86
+ if sys.platform == "win32":
87
+ return "spawn"
88
+ return config.robot_task_mp_method
89
+
90
+ try:
91
+ serialized = _pickler.dumps(coroutine_obj)
92
+ except Exception:
93
+ # cannot serialize the coroutine/callable -> fall back to in-process
94
+ return (None, None, False)
95
+
96
+ parent_conn, child_conn = mp.Pipe(duplex=False)
97
+
98
+ ctx = mp.get_context(_get_mp_start_method())
99
+ p = ctx.Process(target=_worker_run_pickled, args=(serialized, child_conn), daemon=False)
100
+ p.start()
101
+ # close child conn in parent process
102
+ try:
103
+ child_conn.close()
104
+ except Exception:
105
+ pass
106
+ return (p, parent_conn, True)