ws-bom-robot-app 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,8 @@ class Settings(BaseSettings):
11
11
  robot_data_db_folder_src: str = 'src'
12
12
  robot_data_db_folder_out: str = 'out'
13
13
  robot_data_db_folder_store: str = 'store'
14
+ robot_data_db_retention_days: float = 60
15
+ robot_task_retention_days: float = 1
14
16
  robot_cms_host: str = ''
15
17
  robot_cms_auth: str = ''
16
18
  robot_cms_db_folder: str = 'llmVectorDb'
@@ -0,0 +1,93 @@
1
+ import os
2
+ import threading
3
+ import schedule, time
4
+ import asyncio
5
+ from fastapi import APIRouter
6
+ from ws_bom_robot_app.task_manager import task_manager
7
+ from ws_bom_robot_app.llm.utils.kb import kb_cleanup_data_file
8
+ from ws_bom_robot_app.util import _log
9
+
10
+ class RecurringJob():
11
+ def __init__(self, interval: int, job_func, tags: list[str]):
12
+ self.interval = interval
13
+ self.job_func = job_func
14
+ self.is_coroutine = asyncio.iscoroutinefunction(job_func)
15
+ self.job_func = job_func
16
+ self.tags = tags
17
+ def run(self):
18
+ if self.is_coroutine:
19
+ schedule.every(self.interval).seconds.do(self._run_async_job).tag(*self.tags)
20
+ else:
21
+ schedule.every(self.interval).seconds.do(self.job_func).tag(*self.tags)
22
+ async def _run_async_job(self):
23
+ await self.job_func()
24
+
25
+ class CronManager:
26
+
27
+ _list: dict[str, RecurringJob] = {
28
+ 'cleanup-task': RecurringJob(5*60, task_manager.cleanup_task, tags=["cleanup","cleanup-task"]),
29
+ 'cleanup-data': RecurringJob(180*60, kb_cleanup_data_file, tags=["cleanup","cleanup-data"]),
30
+ }
31
+
32
+ def __init__(self):
33
+ self.jobs: dict[str, RecurringJob] = CronManager._list
34
+ self.__scheduler_is_running = False
35
+ def add_job(self, name:str, job: RecurringJob):
36
+ job = {name: job}
37
+ self.jobs.append(job)
38
+ return job
39
+ def run_pending(self):
40
+ return schedule.run_pending()
41
+ def run_all(self):
42
+ return schedule.run_all()
43
+ def clear(self):
44
+ self.__scheduler_is_running = False
45
+ return schedule.clear()
46
+ def get_jobs(self):
47
+ return schedule.get_jobs()
48
+ def start(self):
49
+ def _target():
50
+ while self.__scheduler_is_running:
51
+ time.sleep(1)
52
+ self.run_pending()
53
+ time.sleep(59)
54
+ _log.info(f"__scheduler_is_running={self.__scheduler_is_running}")
55
+ #clear all jobs
56
+ self.clear()
57
+ #prepare jobs
58
+ for job in self.jobs.values():
59
+ job.run()
60
+ #start scheduler
61
+ if not self.__scheduler_is_running:
62
+ self.__scheduler_is_running = True
63
+ t = threading.Thread(target=_target,args=(),daemon=True)
64
+ t.start()
65
+
66
+ cron_manager = CronManager()
67
+
68
+ router = APIRouter(prefix="/api/cron", tags=["cron"])
69
+ @router.get("/list")
70
+ def _list():
71
+ def __format(job: schedule.Job) -> dict:
72
+ return {
73
+ "job": {'module':job.job_func.__module__,'name':job.job_func.__name__},
74
+ "at": job.at_time,
75
+ "interval": job.interval,
76
+ "last_run": job.last_run,
77
+ "next_run": job.next_run,
78
+ "tags": job.tags}
79
+ _list = cron_manager.get_jobs()
80
+ return [__format(_) for _ in _list]
81
+
82
+ @router.get("/start")
83
+ def _start():
84
+ cron_manager.start()
85
+ @router.delete("/stop")
86
+ def _stop():
87
+ return {"_": cron_manager.clear()}
88
+ @router.get("/run/pending")
89
+ def _run_pending():
90
+ return {"_": cron_manager.run_pending()}
91
+ @router.get("/run/all")
92
+ def _run_all():
93
+ return {"_": cron_manager.run_all()}
@@ -7,6 +7,7 @@ from ws_bom_robot_app.llm.main import invoke, stream, stream_none
7
7
  from ws_bom_robot_app.llm.vector_store.generator import kb, rules, kb_stream_file
8
8
  from ws_bom_robot_app.llm.tools.tool_manager import ToolManager
9
9
  from ws_bom_robot_app.llm.vector_store.integration.manager import IntegrationManager
10
+ from ws_bom_robot_app.task_manager import task_manager
10
11
 
11
12
  router = APIRouter(prefix="/api/llm", tags=["llm"])
12
13
 
@@ -38,6 +39,10 @@ async def _stream_raw_none(rq: StreamRequest) -> None:
38
39
  async def _kb(rq: KbRequest) -> VectorDbResponse:
39
40
  return await kb(rq)
40
41
 
42
+ @router.post("/kb/task")
43
+ async def _create_kb_task(rq: KbRequest):
44
+ return {"task_id": task_manager.create_task(kb(rq))}
45
+
41
46
  @router.post("/rules")
42
47
  def _rules(rq: RulesRequest) -> VectorDbResponse:
43
48
  return rules(rq)
@@ -1,10 +1,12 @@
1
- import json, os , uuid, requests
1
+ import aiofiles
2
+ import json, os , uuid, httpx
2
3
  from enum import Enum
3
4
  from typing import Optional, Literal
4
5
  from pydantic import BaseModel, Field, AliasChoices, ConfigDict
5
6
  from langchain_core.documents import Document
6
7
  from ws_bom_robot_app.llm.vector_store.loader.json_loader import JsonLoader
7
8
  from ws_bom_robot_app.util import timer
9
+ import asyncio
8
10
 
9
11
  class LlmKbIntegration(BaseModel):
10
12
  type: str = Field(..., validation_alias=AliasChoices("blockType","type"))
@@ -65,7 +67,7 @@ class LlmKbEndpoint(BaseModel):
65
67
  fields_mapping: LlmKbEndpointFieldsMapping = Field(validation_alias=AliasChoices("fieldsMapping","fields_mapping"))
66
68
 
67
69
  # Remapping Function
68
- def __remap_knowledgebase_file(filepath: str, mapping: LlmKbEndpointFieldsMapping) -> None:
70
+ async def __remap_knowledgebase_file(filepath: str, mapping: LlmKbEndpointFieldsMapping) -> None:
69
71
  map_new_fields = mapping.new_fields or []
70
72
  map_replaced_fields = mapping.replaced_fields or []
71
73
  deleted_fields = mapping.deleted_fields or []
@@ -73,7 +75,7 @@ def __remap_knowledgebase_file(filepath: str, mapping: LlmKbEndpointFieldsMappin
73
75
  if all([not map_new_fields,not map_replaced_fields,not deleted_fields]):
74
76
  return
75
77
 
76
- with open(filepath, 'r', encoding='utf8') as file:
78
+ async with aiofiles.open(filepath, 'r', encoding='utf8') as file:
77
79
  original_data = json.load(file)
78
80
 
79
81
  for item in original_data:
@@ -106,45 +108,102 @@ def __remap_knowledgebase_file(filepath: str, mapping: LlmKbEndpointFieldsMappin
106
108
  # New fields
107
109
  for field in map_new_fields:
108
110
  item[field.name] = field.value
111
+ async with aiofiles.open(filepath, 'w', encoding='utf8') as file:
112
+ await file.write(json.dumps(original_data, ensure_ascii=False, indent=4))
109
113
 
110
- with open(filepath, 'w', encoding='utf8') as file:
114
+ async with aiofiles.open(filepath, 'w', encoding='utf8') as file:
111
115
  json.dump(original_data, file, ensure_ascii=False, indent=4)
112
116
 
113
117
  # Download External Endpoints
114
- @timer
115
- def load_endpoints(endpoints: list[LlmKbEndpoint], destination_directory: str) -> list[Document]:
116
- _documents = []
117
-
118
- for endpoint in endpoints:
119
- headers = {}
120
- if endpoint.authentication != ExternalEndpointAuthentication.NONE:
121
- auth_formats = {
122
- ExternalEndpointAuthentication.BASIC: lambda secret: f'Basic {secret}',
123
- ExternalEndpointAuthentication.BEARER: lambda secret: f'Bearer {secret}',
124
- ExternalEndpointAuthentication.CUSTOM: lambda secret: secret
125
- }
126
- headers['Authorization'] = auth_formats[endpoint.authentication](endpoint.auth_secret)
127
- try:
128
- response = requests.get(endpoint.endpoint_url, headers=headers)
129
- response.raise_for_status()
130
-
131
- mime_type = response.headers.get('content-type', None)
132
- if mime_type.__contains__('application/json'):
133
- filename = f"{uuid.uuid4()}.json"
134
- file_path = os.path.join(destination_directory, filename)
135
- with open(file_path, 'wb') as file:
136
- file.write(response.content)
137
- __remap_knowledgebase_file(file_path, endpoint.fields_mapping)
138
- _documents.extend(
139
- JsonLoader(
140
- file_path,
141
- meta_fields=[field.name for field in endpoint.fields_mapping.meta_fields] if endpoint.fields_mapping.meta_fields else []
142
- ).load())
143
- else:
144
- raise Exception(f"Unsupported content type {mime_type}")
145
-
146
- except requests.exceptions.RequestException as e:
147
- raise Exception(f"Failed to download file from endpoint [status {response.status_code}]: {endpoint.endpoint_url}") from e
148
-
149
- return _documents
118
+ #@timer
119
+ async def load_endpoints(endpoints: list[LlmKbEndpoint], destination_directory: str) -> list[Document]:
120
+ _documents = []
121
+
122
+ async def process_endpoint(endpoint: LlmKbEndpoint):
123
+ headers = {}
124
+ if endpoint.authentication != ExternalEndpointAuthentication.NONE:
125
+ auth_formats = {
126
+ ExternalEndpointAuthentication.BASIC: lambda secret: f'Basic {secret}',
127
+ ExternalEndpointAuthentication.BEARER: lambda secret: f'Bearer {secret}',
128
+ ExternalEndpointAuthentication.CUSTOM: lambda secret: secret
129
+ }
130
+ headers['Authorization'] = auth_formats[endpoint.authentication](endpoint.auth_secret)
131
+ try:
132
+ async with httpx.AsyncClient() as client:
133
+ response = await client.get(endpoint.endpoint_url, headers=headers)
134
+ response.raise_for_status()
135
+
136
+ mime_type = response.headers.get('content-type', None)
137
+ if mime_type.__contains__('application/json'):
138
+ filename = f"{uuid.uuid4()}.json"
139
+ file_path = os.path.join(destination_directory, filename)
140
+ async with aiofiles.open(file_path, 'wb') as file:
141
+ await file.write(response.content)
142
+ await __remap_knowledgebase_file(file_path, endpoint.fields_mapping)
143
+ try:
144
+ documents = await JsonLoader(
145
+ file_path,
146
+ meta_fields=[field.name for field in endpoint.fields_mapping.meta_fields] if endpoint.fields_mapping.meta_fields else []
147
+ ).load()
148
+ _documents.extend(documents)
149
+ await aiofiles.os.remove(file_path)
150
+ except Exception as e:
151
+ raise Exception(f"Failed to load documents from endpoint: {endpoint.endpoint_url} [{e}]") from e
152
+ else:
153
+ raise Exception(f"Unsupported content type {mime_type}")
154
+ except httpx.HTTPStatusError as e:
155
+ raise Exception(f"Failed to download file from endpoint [status {e.response.status_code}]: {endpoint.endpoint_url}") from e
156
+ except httpx.RequestError as e:
157
+ raise Exception(f"Failed to download file from endpoint: {endpoint.endpoint_url} [{e.request.content}]") from e
158
+ except Exception as e:
159
+ raise Exception(f"Failed to download file from endpoint: {endpoint.endpoint_url} [{e}]") from e
160
+
161
+ await asyncio.gather(*[process_endpoint(endpoint) for endpoint in endpoints])
162
+
163
+ return _documents
164
+
165
+ async def __remap_knowledgebase_file(filepath: str, mapping: LlmKbEndpointFieldsMapping) -> None:
166
+ map_new_fields = mapping.new_fields or []
167
+ map_replaced_fields = mapping.replaced_fields or []
168
+ deleted_fields = mapping.deleted_fields or []
169
+
170
+ if all([not map_new_fields, not map_replaced_fields, not deleted_fields]):
171
+ return
172
+
173
+ async with aiofiles.open(filepath, 'r', encoding='utf8') as file:
174
+ original_data = json.loads(await file.read())
175
+
176
+ for item in original_data:
177
+ # Replaced fields
178
+ for field in map_replaced_fields:
179
+ if '.' in field.src_name:
180
+ keys = field.src_name.split('.')
181
+ last_key = keys.pop()
182
+ obj = item
183
+ for key in keys:
184
+ obj = obj.get(key, None)
185
+ if obj is not None:
186
+ obj[field.dest_name] = obj.pop(last_key, None)
187
+ else:
188
+ item[field.dest_name] = item.pop(field.src_name, None)
189
+
190
+ # Deleted fields
191
+ for field in deleted_fields:
192
+ if '.' in field.name:
193
+ keys = field.name.split('.')
194
+ last_key = keys.pop()
195
+ obj = item
196
+ for key in keys:
197
+ obj = obj.get(key, None)
198
+ if obj is not None:
199
+ obj.pop(last_key, None)
200
+ else:
201
+ item.pop(field.name, None)
202
+
203
+ # New fields
204
+ for field in map_new_fields:
205
+ item[field.name] = field.value
206
+
207
+ async with aiofiles.open(filepath, 'w', encoding='utf8') as file:
208
+ await file.write(json.dumps(original_data, ensure_ascii=False, indent=4))
150
209
 
@@ -11,11 +11,13 @@ class FaissHelper():
11
11
  _embedding_model = "text-embedding-3-small"
12
12
  _CACHE: dict[str, FAISS] = {}
13
13
  @staticmethod
14
- @timer
15
- def create(documents: list[Document],folder_path:str,api_key:SecretStr) -> None:
16
- FAISS.from_documents(documents, OpenAIEmbeddings(api_key=api_key, model=FaissHelper._embedding_model)).save_local(folder_path)
14
+ #@timer
15
+ async def create(documents: list[Document], folder_path: str, api_key: SecretStr) -> None:
16
+ embeddings = OpenAIEmbeddings(api_key=api_key, model=FaissHelper._embedding_model)
17
+ faiss_instance = await asyncio.to_thread(FAISS.from_documents, documents, embeddings)
18
+ await asyncio.to_thread(faiss_instance.save_local, folder_path)
17
19
  @staticmethod
18
- @timer
20
+ #@timer
19
21
  def get_loader(folder_path:str,api_key:SecretStr) -> FAISS:
20
22
  """_summary_
21
23
 
@@ -36,7 +38,7 @@ class FaissHelper():
36
38
  return FaissHelper._CACHE[folder_path]
37
39
 
38
40
  @staticmethod
39
- @timer
41
+ #@timer
40
42
  def get_retriever(folder_path:str,api_key:SecretStr,search_type=str, search_kwargs= dict[str,Any]) -> VectorStoreRetriever:
41
43
  """_summary_
42
44
 
@@ -50,7 +52,7 @@ class FaissHelper():
50
52
  _faiss = FaissHelper.get_loader(folder_path,api_key)
51
53
  return _faiss.as_retriever(search_type=search_type, search_kwargs=search_kwargs)
52
54
  @staticmethod
53
- @atimer
55
+ #@atimer
54
56
  async def _combine_search(retrievers: list[VectorStoreRetriever], query:str) -> list[Document]:
55
57
  """_summary_
56
58
 
@@ -76,7 +78,7 @@ class FaissHelper():
76
78
  all_docs = _remove_duplicates([doc for docs in search_results for doc in docs])
77
79
  return all_docs
78
80
  @staticmethod
79
- @atimer
81
+ #@atimer
80
82
  async def invoke(folder_path:str,api_key:SecretStr, query:str, search_type=str, search_kwargs= dict[str,Any]) -> list[Document]:
81
83
  """_summary_
82
84
  Args:
@@ -0,0 +1,34 @@
1
+ import os
2
+ from ws_bom_robot_app.config import config
3
+ from datetime import datetime, timedelta
4
+ from ws_bom_robot_app.util import _log
5
+
6
+ def kb_cleanup_data_file() -> dict:
7
+ """
8
+ clean up old data files in the specified folder
9
+
10
+ Returns:
11
+ - Dictionary with cleanup statistics
12
+ """
13
+ _deleted_files = []
14
+ _freed_space = 0
15
+ folder = os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_out)
16
+
17
+ for root, dirs, files in os.walk(folder, topdown=False):
18
+ for file in files:
19
+ file_path = os.path.join(root, file)
20
+ file_stat = os.stat(file_path)
21
+ file_creation_time = datetime.fromtimestamp(file_stat.st_ctime)
22
+ if file_creation_time < datetime.now() - timedelta(days=config.robot_data_db_retention_days):
23
+ _freed_space += file_stat.st_size
24
+ os.remove(file_path)
25
+ _deleted_files.append(file_path)
26
+ if not os.listdir(root):
27
+ os.rmdir(root)
28
+
29
+ _log.info(f"Deleted {len(_deleted_files)} files; Freed space: {_freed_space / (1024 * 1024):.2f} MB")
30
+
31
+ return {
32
+ "deleted_files_count": len(_deleted_files),
33
+ "freed_space_mb": _freed_space / (1024 * 1024)
34
+ }
@@ -1,4 +1,5 @@
1
- import os, shutil
1
+ import os, shutil, traceback
2
+ import asyncio
2
3
  from fastapi import HTTPException
3
4
  from fastapi.responses import StreamingResponse
4
5
  from langchain_openai import OpenAIEmbeddings
@@ -8,16 +9,16 @@ from ws_bom_robot_app.llm.vector_store.loader.base import Loader
8
9
  from ws_bom_robot_app.llm.models.api import RulesRequest, KbRequest, VectorDbResponse
9
10
  from ws_bom_robot_app.llm.vector_store.integration.manager import IntegrationManager
10
11
  from ws_bom_robot_app.llm.utils.faiss_helper import FaissHelper
11
- from ws_bom_robot_app.util import timer,atimer
12
+ from ws_bom_robot_app.util import timer
12
13
 
13
14
  @timer
14
- def rules(rq: RulesRequest) -> VectorDbResponse:
15
+ async def rules(rq: RulesRequest) -> VectorDbResponse:
15
16
  api_key = rq.api_key()
16
17
  _config = rq.config()
17
18
  db_name = rq.out_name()
18
19
  store_path = os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_store, db_name)
19
20
  try:
20
- FaissHelper.create([Document(page_content=rule, metadata={"source": "rules"}) for rule in rq.rules], store_path, api_key) #type: ignore
21
+ await FaissHelper.create([Document(page_content=rule, metadata={"source": "rules"}) for rule in rq.rules], store_path, api_key) #type: ignore
21
22
  #FAISS.from_texts([json.dumps({"rule": rule}) for rule in rq.rules], OpenAIEmbeddings(api_key=api_key)).save_local(store_path) #type: ignore
22
23
  db_file_path = shutil.make_archive(os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_out, db_name), "zip", store_path)
23
24
  return VectorDbResponse(file = os.path.basename(db_file_path))
@@ -25,8 +26,10 @@ def rules(rq: RulesRequest) -> VectorDbResponse:
25
26
  os.removedirs(store_path)
26
27
  return VectorDbResponse(success = False, error = str(e))
27
28
 
28
- @atimer
29
- async def kb(rq: KbRequest):
29
+ #@atimer
30
+ async def kb(rq: KbRequest) -> VectorDbResponse:
31
+ import aiofiles
32
+ import aiofiles.os
30
33
  os.environ['MPLCONFIGDIR'] = './tmp/.matplotlib'
31
34
  from ws_bom_robot_app.llm.models.kb import load_endpoints
32
35
  from ws_bom_robot_app.llm.utils.download import download_files
@@ -39,8 +42,8 @@ async def kb(rq: KbRequest):
39
42
  if all([not rq.files,not rq.endpoints,not rq.integrations]):
40
43
  return VectorDbResponse(success = False, error = "No files, endpoints or integrations provided")
41
44
  else:
42
- os.makedirs(src_path, exist_ok=True)
43
- os.makedirs(working_path, exist_ok=True)
45
+ await aiofiles.os.makedirs(src_path, exist_ok=True)
46
+ await aiofiles.os.makedirs(working_path, exist_ok=True)
44
47
 
45
48
  loaders = Loader(working_path)
46
49
  filter_file_extensions = loaders.managed_file_extensions()
@@ -55,7 +58,9 @@ async def kb(rq: KbRequest):
55
58
  src_path, authorization=_config.robot_cms_auth)
56
59
  # copy files to working tmp folder
57
60
  for file in rq.files:
58
- shutil.copy(os.path.join(src_path, os.path.basename(file)), os.path.join(working_path, os.path.basename(file)))
61
+ async with aiofiles.open(os.path.join(src_path, os.path.basename(file)), 'rb') as src_file:
62
+ async with aiofiles.open(os.path.join(working_path, os.path.basename(file)), 'wb') as dest_file:
63
+ await dest_file.write(await src_file.read())
59
64
  except Exception as e:
60
65
  return VectorDbResponse(success = False, error = f"Failed to download file {e}")
61
66
 
@@ -63,34 +68,42 @@ async def kb(rq: KbRequest):
63
68
 
64
69
  if rq.endpoints:
65
70
  try:
66
- documents.extend(load_endpoints(rq.endpoints, working_path))
71
+ documents.extend(await load_endpoints(rq.endpoints, working_path))
67
72
  except Exception as e:
68
- return VectorDbResponse(success = False, error = f"{e}")
73
+ tb = traceback.format_exc()
74
+ return VectorDbResponse(success = False, error = f"Endpoint failure: {e} | {tb}")
69
75
 
70
76
  if rq.integrations:
77
+ tasks = []
71
78
  for integration in rq.integrations:
72
- try:
73
- documents.extend(IntegrationManager
74
- .get_strategy(integration.type.lower(),working_path,integration.__pydantic_extra__) #type: ignore
75
- .load()
76
- )
77
- except Exception as e:
78
- return VectorDbResponse(success = False, error = f"Integration failure: {e}")
79
+ tasks.append(
80
+ IntegrationManager
81
+ .get_strategy(integration.type.lower(), working_path, integration.__pydantic_extra__) #type: ignore
82
+ .load()
83
+ )
84
+ try:
85
+ integration_documents = await asyncio.gather(*tasks)
86
+ for docs in integration_documents:
87
+ documents.extend(docs)
88
+ except Exception as e:
89
+ tb = traceback.format_exc()
90
+ return VectorDbResponse(success=False, error=f"Integration failure: {e} | {tb}")
79
91
 
80
92
  try:
81
- documents.extend(loaders.load())
93
+ documents.extend(await loaders.load())
82
94
  except Exception as e:
83
- return VectorDbResponse(success = False, error = f"Failed to load files {e}")
95
+ tb = traceback.format_exc()
96
+ return VectorDbResponse(success = False, error = f"Failed to load files {e} | {tb}")
84
97
  finally:
85
- shutil.rmtree(working_path)
98
+ await aiofiles.os.wrap(shutil.rmtree)(working_path)
86
99
 
87
100
  if documents and len(documents) > 0:
88
101
  try:
89
102
  store_path = os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_store, db_name)
90
103
  # create db
91
- FaissHelper.create(documents,store_path,api_key) #type: ignore
104
+ await FaissHelper.create(documents,store_path,api_key) #type: ignore
92
105
  # zip db
93
- db_file_path = shutil.make_archive(os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_out, db_name), "zip", store_path)
106
+ db_file_path = await aiofiles.os.wrap(shutil.make_archive)(os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_out, db_name), "zip", store_path)
94
107
  return VectorDbResponse(file = os.path.basename(db_file_path))
95
108
  except Exception as e:
96
109
  return VectorDbResponse(success = False, error = str(e))
@@ -99,6 +112,7 @@ async def kb(rq: KbRequest):
99
112
 
100
113
  async def kb_stream_file(filename: str):
101
114
  from ws_bom_robot_app.config import config
115
+
102
116
  file_path = os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_out, filename)
103
117
  if not os.path.isfile(file_path):
104
118
  raise HTTPException(status_code=404, detail="File not found")
@@ -14,6 +14,6 @@ class IntegrationStrategy(ABC):
14
14
  def working_subdirectory(self) -> str:
15
15
  pass
16
16
  @abstractmethod
17
- @timer
17
+ #@timer
18
18
  def load(self) -> list[Document]:
19
19
  pass
@@ -1,3 +1,5 @@
1
+ import aiofiles
2
+ import aiofiles.os
1
3
  from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
2
4
  from langchain_community.document_loaders.sitemap import SitemapLoader
3
5
  from langchain_community.document_transformers import MarkdownifyTransformer as markdownify
@@ -53,7 +55,7 @@ class Sitemap(IntegrationStrategy):
53
55
  for _ in content.select(element):
54
56
  _.decompose()
55
57
  return str(self._extract(content))
56
- def load(self) -> list[Document]:
58
+ async def load(self) -> list[Document]:
57
59
  def _is_local(url: str) -> bool:
58
60
  return not url.startswith("http")
59
61
  def _remap_if_local(url: str) -> str:
@@ -66,8 +68,8 @@ class Sitemap(IntegrationStrategy):
66
68
  parsing_function=self._parse,
67
69
  is_local=_is_local(self.__sitemap_url)
68
70
  )
69
- _docs = self._output(_loader.load())
71
+ _docs = self._output([document async for document in _loader.alazy_load()])
70
72
  if _is_local(self.__sitemap_url):
71
- os.remove(_loader.web_path)
73
+ await aiofiles.os.remove(_loader.web_path)
72
74
  return _docs
73
75
  return []
@@ -1,4 +1,5 @@
1
1
 
2
+ import asyncio
2
3
  from ws_bom_robot_app.config import config
3
4
  from typing import Any, Callable, Generator, Optional, Tuple
4
5
  from langchain_community.document_loaders import DirectoryLoader
@@ -72,7 +73,7 @@ class Loader():
72
73
  def managed_file_extensions() -> list[str]:
73
74
  return [k for k,v in Loader._list.items() if v is not None]
74
75
 
75
- @timer
76
+ #@timer
76
77
  def __directory_loader(self) -> list[DirectoryLoader]:
77
78
  loader_configs = {}
78
79
  for ext, loader_config in Loader._list.items():
@@ -88,6 +89,7 @@ class Loader():
88
89
  }
89
90
  loader_configs[loader_key]["glob_patterns"].append(f"**/*{ext}")
90
91
  loaders = []
92
+
91
93
  for loader_config in loader_configs.values():
92
94
  loaders.append(
93
95
  DirectoryLoader(
@@ -104,7 +106,7 @@ class Loader():
104
106
  )
105
107
  return loaders
106
108
 
107
- @timer
108
- def load(self) -> list[Document]:
109
+ #@timer
110
+ async def load(self) -> list[Document]:
109
111
  loaders = MergedDataLoader(self.__directory_loader())
110
- return loaders.load()
112
+ return await asyncio.to_thread(loaders.load)
@@ -2,6 +2,7 @@ import json
2
2
  from typing import Optional
3
3
  from langchain_core.documents import Document
4
4
  from langchain_community.document_loaders.base import BaseLoader
5
+ import aiofiles
5
6
 
6
7
  class JsonLoader(BaseLoader):
7
8
  def __init__(self, file_path: str, meta_fields:Optional[list[str]] = [],encoding: Optional[str] = "utf-8"):
@@ -9,16 +10,17 @@ class JsonLoader(BaseLoader):
9
10
  self.meta_fields = meta_fields
10
11
  self.encoding = encoding
11
12
 
12
- def load(self) -> list[Document]:
13
- with open(self.file_path, "r", encoding=self.encoding) as file:
14
- data = json.load(file)
15
- _list = data if isinstance(data, list) else [data]
13
+ async def load(self) -> list[Document]:
14
+ async with aiofiles.open(self.file_path, "r", encoding=self.encoding) as file:
15
+ data = json.loads(await file.read())
16
+ _list = data if isinstance(data, list) else [data]
16
17
  return [
17
18
  Document(
18
19
  page_content=json.dumps(item),
19
20
  metadata={
20
21
  "source": self.file_path,
21
- **{field: item.get(field) for field in self.meta_fields if item.get(field)}}
22
- )
23
- for item in _list
22
+ **{field: item.get(field) for field in self.meta_fields if item.get(field)}
23
+ }
24
+ )
25
+ for item in _list
24
26
  ]
ws_bom_robot_app/main.py CHANGED
@@ -7,12 +7,18 @@ from fastapi.openapi.utils import get_openapi
7
7
  from ws_bom_robot_app.auth import authenticate
8
8
  from ws_bom_robot_app.config import config
9
9
  from ws_bom_robot_app.llm.api import router as llm
10
+ from ws_bom_robot_app.task_manager import router as task
11
+ from ws_bom_robot_app.cron_manager import (
12
+ router as cron,
13
+ cron_manager)
10
14
  from ws_bom_robot_app.util import _log
11
15
 
12
16
  _uptime = datetime.datetime.now()
17
+ cron_manager.start()
13
18
  app = FastAPI(redoc_url=None,docs_url=None,openapi_url=None)
14
-
15
19
  app.include_router(llm,dependencies=[Depends(authenticate)])
20
+ app.include_router(task,dependencies=[Depends(authenticate)])
21
+ app.include_router(cron,dependencies=[Depends(authenticate)])
16
22
 
17
23
  @app.get("/")
18
24
  async def root():
@@ -0,0 +1,120 @@
1
+ import asyncio, os
2
+ from datetime import datetime, timedelta
3
+ from enum import Enum
4
+ from typing import TypeVar, Optional, Dict, Union
5
+ from pydantic import BaseModel, ConfigDict, computed_field
6
+ from uuid import uuid4
7
+ from fastapi import APIRouter, HTTPException
8
+ from ws_bom_robot_app.config import config
9
+
10
+ T = TypeVar('T')
11
+
12
+ class TaskMetaData(BaseModel):
13
+ start_time: datetime
14
+ end_time: Optional[datetime] = None
15
+ @computed_field
16
+ @property
17
+ def elapsed_time(self) -> Union[timedelta, None]:
18
+ return (datetime.now() if not self.end_time else self.end_time) - self.start_time
19
+ source: Optional[str] = None
20
+ pid: Optional[int] = None
21
+ model_config = ConfigDict(
22
+ json_encoders={
23
+ datetime: lambda v: v.isoformat(),
24
+ timedelta: lambda v: str(v)
25
+ }
26
+ )
27
+
28
+ class TaskStatus(BaseModel):
29
+ class TaskStatusEnum(str, Enum):
30
+ pending = "pending"
31
+ completed = "completed"
32
+ failure = "failure"
33
+ task_id: str
34
+ status: TaskStatusEnum
35
+ #result: Optional[Dict[str, Any]] = None
36
+ result: Optional[T] = None
37
+ metadata: TaskMetaData = None
38
+ error: Optional[str] = None
39
+
40
+ class TaskEntry(BaseModel):
41
+ id: str
42
+ task: Union[asyncio.Task, None] = None
43
+ status: TaskStatus = None
44
+ def __init__(self, **kwargs):
45
+ super().__init__(**kwargs)
46
+ self.status = TaskStatus(
47
+ task_id=self.id,
48
+ status=TaskStatus.TaskStatusEnum.pending,
49
+ metadata=TaskMetaData(start_time=datetime.now(), source=str(self.task), pid=os.getpid())
50
+ )
51
+ model_config = ConfigDict(
52
+ arbitrary_types_allowed=True
53
+ )
54
+
55
+ class TaskManager:
56
+ def __init__(self):
57
+ self.tasks: Dict[str, TaskEntry] = {}
58
+
59
+ def _task_done_callback(self, task_id: str):
60
+ def callback(task: asyncio.Task):
61
+ if _task := self.tasks.get(task_id):
62
+ try:
63
+ result = _task.task.result()
64
+ _task.status.status = TaskStatus.TaskStatusEnum.completed
65
+ _task.status.result = result
66
+ except Exception as e:
67
+ _task.status.status = TaskStatus.TaskStatusEnum.failure
68
+ _task.status.error = str(e)
69
+ finally:
70
+ _task.status.metadata.end_time = datetime.now()
71
+ return callback
72
+
73
+ def create_task(self, coroutine: asyncio.coroutines) -> str:
74
+ _task = asyncio.create_task(coroutine)
75
+ task = TaskEntry(id=str(uuid4()),task=_task)
76
+ task.task.add_done_callback(self._task_done_callback(task.id))
77
+ self.tasks[task.id] = task
78
+ return task.id
79
+
80
+ def get_task(self, task_id: str) -> TaskEntry | None:
81
+ if _task := self.tasks.get(task_id):
82
+ return _task
83
+ return None
84
+
85
+ def remove_task(self, task_id: str) -> None:
86
+ if task_id in self.tasks:
87
+ del self.tasks[task_id]
88
+
89
+ def cleanup_task(self):
90
+ for task_id in [task_id for task_id, task in self.tasks.items()
91
+ if task.status.status in {TaskStatus.TaskStatusEnum.completed, TaskStatus.TaskStatusEnum.failure}
92
+ and task.status.metadata.end_time < datetime.now() - timedelta(days=config.robot_task_retention_days)]:
93
+ self.remove_task(task_id)
94
+
95
+ # global instance
96
+ task_manager = TaskManager()
97
+
98
+ router = APIRouter(prefix="/api/task", tags=["task"])
99
+ @router.get("/status/{task_id}", response_model=TaskStatus)
100
+ async def _status_task(task_id: str) -> TaskStatus:
101
+ task = task_manager.get_task(task_id)
102
+ if not task:
103
+ raise HTTPException(status_code=404, detail="Task not found")
104
+ return task.status
105
+ @router.get("/status")
106
+ async def _status_task_list():
107
+ _status_task_list = []
108
+ for task in task_manager.tasks.values():
109
+ _task = task_manager.get_task(task.id)
110
+ _status_task_list.append(_task.status)
111
+ return _status_task_list
112
+ @router.delete("/status/{task_id}")
113
+ async def _remove_task(task_id: str):
114
+ task_manager.remove_task(task_id)
115
+ return {"success":"ok"}
116
+ @router.delete("/cleanup")
117
+ async def _remove_task_list():
118
+ task_manager.cleanup_task()
119
+ return {"success":"ok"}
120
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.5
3
+ Version: 0.0.7
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -10,6 +10,8 @@ Classifier: License :: OSI Approved :: MIT License
10
10
  Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.12
12
12
  Description-Content-Type: text/markdown
13
+ Requires-Dist: schedule
14
+ Requires-Dist: aiofiles
13
15
  Requires-Dist: pydantic==2.9.2
14
16
  Requires-Dist: pydantic-settings==2.6.0
15
17
  Requires-Dist: fastapi[standard]==0.115.5
@@ -1,19 +1,21 @@
1
1
  ws_bom_robot_app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  ws_bom_robot_app/auth.py,sha256=84nIbmJsMrNs0sxIQGEHbjsjc2P6ZrZZGSn8dkiL6is,895
3
- ws_bom_robot_app/config.py,sha256=ITbrJzlBmr11hgzmyQMUAubNbX_H_xscOgY5u4rdtDA,1743
4
- ws_bom_robot_app/main.py,sha256=eqGA4wK-MVhnZMg97KLNRYP5-RfMEQwrvVLY1hkz5AU,3144
3
+ ws_bom_robot_app/config.py,sha256=VUoE6MBpYnZ8FQvUjWOLCzfs35aUvpwrz4ROFVbO3AA,1831
4
+ ws_bom_robot_app/cron_manager.py,sha256=p0l0ybwfNhPn-R0kr3sq4rmR9VW3XDrsOu1BgvapYKc,3163
5
+ ws_bom_robot_app/main.py,sha256=OZ0s2L2fKEwH46Vnt5KGEbuq-udEJH4WsinR2Ws8B4E,3433
6
+ ws_bom_robot_app/task_manager.py,sha256=O6ZiQto0SxqZWja8bVc-Sok5roT_ja-uOISDYs0oBGE,4284
5
7
  ws_bom_robot_app/util.py,sha256=3aBK-bhsvKJwJeWOHh0c1B1BOyJ_tnUxOa1mJmFKwYQ,2618
6
8
  ws_bom_robot_app/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
9
  ws_bom_robot_app/llm/agent_description.py,sha256=9gK7MfsPvk7YTvk-bbLU_r4ygoEUUrepKxuj9zLxsgI,4681
8
10
  ws_bom_robot_app/llm/agent_handler.py,sha256=hbOf9i-ynDL3bcClqtUG-yWY8ohbCxONfT5ek9Cv0gY,5667
9
11
  ws_bom_robot_app/llm/agent_lcel.py,sha256=FzRt_u-zC78-DLS9cigO2oA6bsX6SOERuAFbevSXpT8,2701
10
- ws_bom_robot_app/llm/api.py,sha256=CyvBwyg6Wrxi0esMcTob8F3babuYJLr5huypLP0XpbQ,2695
12
+ ws_bom_robot_app/llm/api.py,sha256=A1IFiiBLHdCJsFDllWTAdQEgkRm5a0BjU-oKozxlhiw,2880
11
13
  ws_bom_robot_app/llm/defaut_prompt.py,sha256=pn5a4lNLWE1NngHYjA_7tD8GasePMgsgude5fIJxsW0,756
12
14
  ws_bom_robot_app/llm/main.py,sha256=Q3A_Vgz09fhj78_PNVpDL9VQCYQd2lW_hmfyEKa_17Q,3667
13
15
  ws_bom_robot_app/llm/settings.py,sha256=EkFGCppORenStH9W4e6_dYvQ-5p6xiEMpmUHBqNqG9M,117
14
16
  ws_bom_robot_app/llm/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
17
  ws_bom_robot_app/llm/models/api.py,sha256=Bi9YwNBJExoZzPVCerAoweWCY9zGAF9rn_N_aBNKmrU,6922
16
- ws_bom_robot_app/llm/models/kb.py,sha256=TqEAxMvXl0lyQhdojkRPXF_TW_zgtPf-uycGFyRXDsw,6612
18
+ ws_bom_robot_app/llm/models/kb.py,sha256=EqC9M_nQzDKX9ttL5QG2d04aH19sh-svJdw1MinvGfk,8925
17
19
  ws_bom_robot_app/llm/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
20
  ws_bom_robot_app/llm/tools/tool_builder.py,sha256=rkYu0PrXV84PMi7INjCSWlrWMykUCI8aeF-QjZgLysM,854
19
21
  ws_bom_robot_app/llm/tools/tool_manager.py,sha256=y4K1NiDsVbdZjk2xUEK_T6j-4fHmV5QY02j5tHcoBRs,3708
@@ -23,18 +25,19 @@ ws_bom_robot_app/llm/tools/models/main.py,sha256=LsOJ7vkcSzYLoE1oa3TG0Rs0pr9J5VS
23
25
  ws_bom_robot_app/llm/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
26
  ws_bom_robot_app/llm/utils/agent_utils.py,sha256=vdX_1Kxn6RcBljOSBlee1avi_0TyIk-rm0RGGeLt468,707
25
27
  ws_bom_robot_app/llm/utils/download.py,sha256=iAUxH_NiCpTPtGzhC4hBtxotd2HPFt2MBhttslIxqiI,3194
26
- ws_bom_robot_app/llm/utils/faiss_helper.py,sha256=WJN1sljdLWHXISk7aabVsfu6uZJpE6B7JaPS8l6UQCg,4517
28
+ ws_bom_robot_app/llm/utils/faiss_helper.py,sha256=DowmroVT6eIbvnA-TG84PS_D7ujvxSRIKdLuIcJmd6Q,4650
29
+ ws_bom_robot_app/llm/utils/kb.py,sha256=jja45WCbNI7SGEgqDS99nErlwB5eY8Ga7BMnhdMHZ90,1279
27
30
  ws_bom_robot_app/llm/utils/print.py,sha256=bpLWY0KHXe7x7PWcWy8NS54ZWzHY8b4jrLRkpnDl108,818
28
31
  ws_bom_robot_app/llm/vector_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
- ws_bom_robot_app/llm/vector_store/generator.py,sha256=yKIlSuaQKjnRrg8ut0_FZ0iF2zv8XCiQzawxpIsl7Qg,5210
32
+ ws_bom_robot_app/llm/vector_store/generator.py,sha256=_39HONBDSnIVOECoxyVHStg2XX4-m9Enm4pECm7wRBI,5816
30
33
  ws_bom_robot_app/llm/vector_store/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- ws_bom_robot_app/llm/vector_store/integration/base.py,sha256=EHGAF8W12XC6DIAFN-Qd3jCBAbq7yKxTk086cdg8e9E,625
34
+ ws_bom_robot_app/llm/vector_store/integration/base.py,sha256=eCKD3U0KPoVDMtKr2iZqauMFEKd9b2k6rqPG_YjDy0g,626
32
35
  ws_bom_robot_app/llm/vector_store/integration/manager.py,sha256=cSFlE2erMv3Uchy788mlCFdcvmyeoqdeIiGmJ9QbLhY,583
33
- ws_bom_robot_app/llm/vector_store/integration/sitemap.py,sha256=h2NhPv7k3p7wApUixq-zC4zMHz_4RLekigFqfst0bWg,3962
36
+ ws_bom_robot_app/llm/vector_store/integration/sitemap.py,sha256=bdrgHBVB8Jt3xiRuomSIgJgnyEXoMJWdq4SvYPY_chA,4059
34
37
  ws_bom_robot_app/llm/vector_store/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
- ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=U9vxv7-CxLNGNAvdodcbhpOjaByxbXByt6EmZtA6Jjc,4487
36
- ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=2508f7RojpXCNd9hS2GC_wzXrVYsTiojsqrAJnxUvnA,829
37
- ws_bom_robot_app-0.0.5.dist-info/METADATA,sha256=_kTHKiXVLdx1uSil7npNBirDKFvIyK-uWB5UDyxwQps,5558
38
- ws_bom_robot_app-0.0.5.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
39
- ws_bom_robot_app-0.0.5.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
40
- ws_bom_robot_app-0.0.5.dist-info/RECORD,,
38
+ ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=YKtJWTVQuOvRR2CLYTa0bGEbjqgmYVVud1YKQ9QLibc,4536
39
+ ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=G9BoxwsevgqL72h2n28O2LpzaCYNymBkX66wru9GkCw,884
40
+ ws_bom_robot_app-0.0.7.dist-info/METADATA,sha256=6CJ3Dxk0F_0N00bVKmu79zTkeP6n5EnyHkCUdBs0FKw,5608
41
+ ws_bom_robot_app-0.0.7.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
42
+ ws_bom_robot_app-0.0.7.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
43
+ ws_bom_robot_app-0.0.7.dist-info/RECORD,,