ws-bom-robot-app 0.0.17__tar.gz → 0.0.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/PKG-INFO +5 -1
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/setup.py +1 -1
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/utils/faiss_helper.py +19 -6
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/vector_store/generator.py +35 -28
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/vector_store/integration/confluence.py +2 -3
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/vector_store/integration/github.py +1 -3
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/vector_store/integration/jira.py +1 -3
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/vector_store/loader/base.py +16 -3
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/requirements.txt +4 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/task_manager.py +5 -5
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/util.py +26 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app.egg-info/PKG-INFO +5 -1
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app.egg-info/requires.txt +4 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/README.md +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/pyproject.toml +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/setup.cfg +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/__init__.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/auth.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/config.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/cron_manager.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/__init__.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/agent_description.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/agent_handler.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/agent_lcel.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/api.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/defaut_prompt.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/main.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/models/__init__.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/models/api.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/models/base.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/models/kb.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/settings.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/tools/__init__.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/tools/models/__init__.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/tools/models/main.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/tools/tool_builder.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/tools/tool_manager.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/tools/utils.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/utils/__init__.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/utils/agent_utils.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/utils/download.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/utils/kb.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/utils/print.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/utils/webhooks.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/vector_store/__init__.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/vector_store/integration/__init__.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/vector_store/integration/base.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/vector_store/integration/manager.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/vector_store/integration/sitemap.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/vector_store/loader/__init__.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/vector_store/loader/json_loader.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/main.py +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app.egg-info/SOURCES.txt +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app.egg-info/dependency_links.txt +0 -0
- {ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ws_bom_robot_app
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.19
|
|
4
4
|
Summary: A FastAPI application serving ws bom/robot/llm platform ai.
|
|
5
5
|
Home-page: https://github.com/websolutespa/bom
|
|
6
6
|
Author: Websolute Spa
|
|
@@ -25,6 +25,10 @@ Requires-Dist: python-magic==0.4.27
|
|
|
25
25
|
Requires-Dist: opencv-python-headless==4.10.0.84
|
|
26
26
|
Requires-Dist: unstructured[all-docs]==0.15.14
|
|
27
27
|
Requires-Dist: langchain_unstructured==0.1.5
|
|
28
|
+
Requires-Dist: unstructured-ingest==0.3.8
|
|
29
|
+
Requires-Dist: unstructured-ingest[confluence]
|
|
30
|
+
Requires-Dist: unstructured-ingest[github]
|
|
31
|
+
Requires-Dist: unstructured-ingest[jira]
|
|
28
32
|
Requires-Dist: html5lib==1.1
|
|
29
33
|
Requires-Dist: markdownify==0.14.1
|
|
30
34
|
Requires-Dist: nebuly==0.3.33
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="ws_bom_robot_app",
|
|
5
|
-
version="0.0.
|
|
5
|
+
version="0.0.19",
|
|
6
6
|
description="A FastAPI application serving ws bom/robot/llm platform ai.",
|
|
7
7
|
long_description=open("README.md", encoding='utf-8').read(),
|
|
8
8
|
long_description_content_type="text/markdown",
|
{ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/utils/faiss_helper.py
RENAMED
|
@@ -3,19 +3,32 @@ from langchain_core.documents import Document
|
|
|
3
3
|
from langchain_core.vectorstores.base import VectorStoreRetriever
|
|
4
4
|
from langchain_openai import OpenAIEmbeddings
|
|
5
5
|
from typing import Any
|
|
6
|
-
import asyncio
|
|
6
|
+
import asyncio, gc, logging
|
|
7
7
|
from pydantic import SecretStr
|
|
8
|
-
from ws_bom_robot_app.util import timer,atimer
|
|
9
8
|
|
|
10
9
|
class FaissHelper():
|
|
11
10
|
_embedding_model = "text-embedding-3-small"
|
|
12
11
|
_CACHE: dict[str, FAISS] = {}
|
|
13
12
|
@staticmethod
|
|
14
13
|
#@timer
|
|
15
|
-
async def create(documents: list[Document], folder_path: str, api_key: SecretStr) -> None:
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
14
|
+
async def create(documents: list[Document], folder_path: str, api_key: SecretStr, return_folder_path:bool = False) -> str | None:
|
|
15
|
+
try:
|
|
16
|
+
embeddings = OpenAIEmbeddings(api_key=api_key, model=FaissHelper._embedding_model)
|
|
17
|
+
faiss_instance = await asyncio.to_thread(FAISS.from_documents, documents, embeddings)
|
|
18
|
+
await asyncio.to_thread(faiss_instance.save_local, folder_path)
|
|
19
|
+
del faiss_instance, embeddings
|
|
20
|
+
gc.collect()
|
|
21
|
+
if return_folder_path:
|
|
22
|
+
return folder_path
|
|
23
|
+
return None
|
|
24
|
+
except Exception as e:
|
|
25
|
+
logging.error(f"Failed to create Faiss instance: {e}")
|
|
26
|
+
return None
|
|
27
|
+
finally:
|
|
28
|
+
if 'documents' in locals():
|
|
29
|
+
del documents
|
|
30
|
+
gc.collect()
|
|
31
|
+
|
|
19
32
|
@staticmethod
|
|
20
33
|
#@timer
|
|
21
34
|
def get_loader(folder_path:str,api_key:SecretStr) -> FAISS:
|
{ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/vector_store/generator.py
RENAMED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import os, shutil, traceback
|
|
2
|
-
import asyncio
|
|
1
|
+
import os, gc, shutil, traceback
|
|
2
|
+
import asyncio, aiofiles, aiofiles.os
|
|
3
3
|
from fastapi import HTTPException
|
|
4
4
|
from fastapi.responses import StreamingResponse
|
|
5
5
|
from langchain_core.documents import Document
|
|
@@ -7,7 +7,13 @@ from ws_bom_robot_app.llm.vector_store.loader.base import Loader
|
|
|
7
7
|
from ws_bom_robot_app.llm.models.api import RulesRequest, KbRequest, VectorDbResponse
|
|
8
8
|
from ws_bom_robot_app.llm.vector_store.integration.manager import IntegrationManager
|
|
9
9
|
from ws_bom_robot_app.llm.utils.faiss_helper import FaissHelper
|
|
10
|
-
from ws_bom_robot_app.
|
|
10
|
+
from ws_bom_robot_app.config import config
|
|
11
|
+
from ws_bom_robot_app.llm.models.kb import load_endpoints
|
|
12
|
+
from ws_bom_robot_app.llm.utils.download import download_files
|
|
13
|
+
|
|
14
|
+
async def _cleanup_directory(directory_path: str):
|
|
15
|
+
if os.path.exists(directory_path):
|
|
16
|
+
await asyncio.to_thread(shutil.rmtree, directory_path)
|
|
11
17
|
|
|
12
18
|
#@timer
|
|
13
19
|
async def rules(rq: RulesRequest) -> VectorDbResponse:
|
|
@@ -17,21 +23,17 @@ async def rules(rq: RulesRequest) -> VectorDbResponse:
|
|
|
17
23
|
store_path = os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_store, db_name)
|
|
18
24
|
try:
|
|
19
25
|
await FaissHelper.create([Document(page_content=rule, metadata={"source": "rules"}) for rule in rq.rules], store_path, api_key) #type: ignore
|
|
20
|
-
#FAISS.from_texts([json.dumps({"rule": rule}) for rule in rq.rules], OpenAIEmbeddings(api_key=api_key)).save_local(store_path) #type: ignore
|
|
21
26
|
db_file_path = shutil.make_archive(os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_out, db_name), "zip", store_path)
|
|
22
27
|
return VectorDbResponse(file = os.path.basename(db_file_path))
|
|
23
28
|
except Exception as e:
|
|
24
|
-
|
|
25
|
-
shutil.rmtree(store_path)
|
|
29
|
+
await _cleanup_directory(store_path)
|
|
26
30
|
return VectorDbResponse(success = False, error = str(e))
|
|
31
|
+
finally:
|
|
32
|
+
gc.collect()
|
|
27
33
|
|
|
28
34
|
#@atimer
|
|
29
35
|
async def kb(rq: KbRequest) -> VectorDbResponse:
|
|
30
|
-
import aiofiles
|
|
31
|
-
import aiofiles.os
|
|
32
36
|
os.environ['MPLCONFIGDIR'] = './tmp/.matplotlib'
|
|
33
|
-
from ws_bom_robot_app.llm.models.kb import load_endpoints
|
|
34
|
-
from ws_bom_robot_app.llm.utils.download import download_files
|
|
35
37
|
api_key = rq.api_key()
|
|
36
38
|
_config = rq.config()
|
|
37
39
|
db_name = rq.out_name()
|
|
@@ -44,12 +46,12 @@ async def kb(rq: KbRequest) -> VectorDbResponse:
|
|
|
44
46
|
await aiofiles.os.makedirs(src_path, exist_ok=True)
|
|
45
47
|
await aiofiles.os.makedirs(working_path, exist_ok=True)
|
|
46
48
|
|
|
47
|
-
|
|
48
|
-
filter_file_extensions = loaders.managed_file_extensions()
|
|
49
|
-
|
|
49
|
+
documents: list[Document] = []
|
|
50
50
|
# Download/copy all files
|
|
51
51
|
if rq.files:
|
|
52
52
|
try:
|
|
53
|
+
loaders = Loader(working_path)
|
|
54
|
+
filter_file_extensions = loaders.managed_file_extensions()
|
|
53
55
|
files_to_download = [file for file in rq.files if not os.path.exists(os.path.join(src_path, os.path.basename(file)))]
|
|
54
56
|
if files_to_download:
|
|
55
57
|
await download_files(
|
|
@@ -60,15 +62,21 @@ async def kb(rq: KbRequest) -> VectorDbResponse:
|
|
|
60
62
|
async with aiofiles.open(os.path.join(src_path, os.path.basename(file)), 'rb') as src_file:
|
|
61
63
|
async with aiofiles.open(os.path.join(working_path, os.path.basename(file)), 'wb') as dest_file:
|
|
62
64
|
await dest_file.write(await src_file.read())
|
|
65
|
+
#load files
|
|
66
|
+
try:
|
|
67
|
+
documents.extend(await loaders.load())
|
|
68
|
+
except Exception as e:
|
|
69
|
+
tb = traceback.format_exc()
|
|
70
|
+
return VectorDbResponse(success = False, error = f"File loader failure: {e} | {tb}")
|
|
63
71
|
except Exception as e:
|
|
72
|
+
await _cleanup_directory(working_path)
|
|
64
73
|
return VectorDbResponse(success = False, error = f"Failed to download file {e}")
|
|
65
74
|
|
|
66
|
-
documents: list[Document] = []
|
|
67
|
-
|
|
68
75
|
if rq.endpoints:
|
|
69
76
|
try:
|
|
70
77
|
documents.extend(await load_endpoints(rq.endpoints, working_path))
|
|
71
78
|
except Exception as e:
|
|
79
|
+
await _cleanup_directory(working_path)
|
|
72
80
|
tb = traceback.format_exc()
|
|
73
81
|
return VectorDbResponse(success = False, error = f"Endpoint failure: {e} | {tb}")
|
|
74
82
|
|
|
@@ -85,33 +93,32 @@ async def kb(rq: KbRequest) -> VectorDbResponse:
|
|
|
85
93
|
for docs in integration_documents:
|
|
86
94
|
documents.extend(docs)
|
|
87
95
|
except Exception as e:
|
|
96
|
+
await _cleanup_directory(working_path)
|
|
88
97
|
tb = traceback.format_exc()
|
|
89
98
|
return VectorDbResponse(success=False, error=f"Integration failure: {e} | {tb}")
|
|
90
99
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
except Exception as e:
|
|
94
|
-
tb = traceback.format_exc()
|
|
95
|
-
return VectorDbResponse(success = False, error = f"Failed to load files {e} | {tb}")
|
|
96
|
-
finally:
|
|
97
|
-
await aiofiles.os.wrap(shutil.rmtree)(working_path)
|
|
100
|
+
#cleanup
|
|
101
|
+
await _cleanup_directory(working_path)
|
|
98
102
|
|
|
99
103
|
if documents and len(documents) > 0:
|
|
100
104
|
try:
|
|
101
105
|
store_path = os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_store, db_name)
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
+
db_file_path = await aiofiles.os.wrap(shutil.make_archive)(
|
|
107
|
+
os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_out, db_name),
|
|
108
|
+
"zip",
|
|
109
|
+
await FaissHelper.create(documents, store_path, api_key, return_folder_path=True)
|
|
110
|
+
)
|
|
106
111
|
return VectorDbResponse(file = os.path.basename(db_file_path))
|
|
107
112
|
except Exception as e:
|
|
113
|
+
await _cleanup_directory(store_path)
|
|
108
114
|
return VectorDbResponse(success = False, error = str(e))
|
|
115
|
+
finally:
|
|
116
|
+
del documents
|
|
117
|
+
gc.collect()
|
|
109
118
|
else:
|
|
110
119
|
return VectorDbResponse(success = False, error = "No documents found in the knowledgebase folder")
|
|
111
120
|
|
|
112
121
|
async def kb_stream_file(filename: str):
|
|
113
|
-
from ws_bom_robot_app.config import config
|
|
114
|
-
|
|
115
122
|
file_path = os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_out, filename)
|
|
116
123
|
if not os.path.isfile(file_path):
|
|
117
124
|
raise HTTPException(status_code=404, detail="File not found")
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
import os
|
|
3
2
|
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
|
|
4
3
|
from unstructured_ingest.connector.confluence import SimpleConfluenceConfig, ConfluenceAccessConfig
|
|
5
4
|
from unstructured_ingest.runner import ConfluenceRunner
|
|
@@ -42,6 +41,6 @@ class Confluence(IntegrationStrategy):
|
|
|
42
41
|
)
|
|
43
42
|
runner.run()
|
|
44
43
|
async def load(self) -> list[Document]:
|
|
45
|
-
self.run
|
|
46
|
-
await asyncio.sleep(1)
|
|
44
|
+
await asyncio.to_thread(self.run)
|
|
47
45
|
return await self.__loader.load()
|
|
46
|
+
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
import os
|
|
3
2
|
from typing import Optional, Union
|
|
4
3
|
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
|
|
5
4
|
from unstructured_ingest.connector.git import GitAccessConfig
|
|
@@ -43,6 +42,5 @@ class Github(IntegrationStrategy):
|
|
|
43
42
|
)
|
|
44
43
|
runner.run()
|
|
45
44
|
async def load(self) -> list[Document]:
|
|
46
|
-
self.run
|
|
47
|
-
await asyncio.sleep(1)
|
|
45
|
+
await asyncio.to_thread(self.run)
|
|
48
46
|
return await self.__loader.load()
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
import os
|
|
3
2
|
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
|
|
4
3
|
from unstructured_ingest.connector.jira import SimpleJiraConfig, JiraAccessConfig
|
|
5
4
|
from unstructured_ingest.runner import JiraRunner
|
|
@@ -44,6 +43,5 @@ class Jira(IntegrationStrategy):
|
|
|
44
43
|
)
|
|
45
44
|
runner.run()
|
|
46
45
|
async def load(self) -> list[Document]:
|
|
47
|
-
self.run
|
|
48
|
-
await asyncio.sleep(1)
|
|
46
|
+
await asyncio.to_thread(self.run)
|
|
49
47
|
return await self.__loader.load()
|
{ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/vector_store/loader/base.py
RENAMED
|
@@ -9,7 +9,8 @@ from langchain_core.documents import Document
|
|
|
9
9
|
from langchain_unstructured import UnstructuredLoader
|
|
10
10
|
from pydantic import BaseModel
|
|
11
11
|
from ws_bom_robot_app.llm.vector_store.loader.json_loader import JsonLoader
|
|
12
|
-
|
|
12
|
+
import gc, logging
|
|
13
|
+
|
|
13
14
|
class LoaderConfig(BaseModel):
|
|
14
15
|
loader: type[BaseLoader]
|
|
15
16
|
kwargs: Optional[dict[str, Any]] = {}
|
|
@@ -108,6 +109,18 @@ class Loader():
|
|
|
108
109
|
|
|
109
110
|
#@timer
|
|
110
111
|
async def load(self) -> list[Document]:
|
|
112
|
+
MAX_RETRIES = 3
|
|
111
113
|
loaders = MergedDataLoader(self.__directory_loader())
|
|
112
|
-
|
|
113
|
-
|
|
114
|
+
try:
|
|
115
|
+
for attempt in range(MAX_RETRIES):
|
|
116
|
+
try:
|
|
117
|
+
return await loaders.aload()
|
|
118
|
+
#return await [doc async for doc in loaders.alazy_load()]
|
|
119
|
+
except Exception as e:
|
|
120
|
+
logging.warning(f"Attempt {attempt+1} load document failed: {e}")
|
|
121
|
+
await asyncio.sleep(1)
|
|
122
|
+
if attempt == MAX_RETRIES - 1:
|
|
123
|
+
logging.error(f"Failed to load documents: {e}")
|
|
124
|
+
return []
|
|
125
|
+
finally:
|
|
126
|
+
gc.collect()
|
|
@@ -22,6 +22,10 @@ python-magic==0.4.27
|
|
|
22
22
|
opencv-python-headless==4.10.0.84 #docker specs
|
|
23
23
|
unstructured[all-docs]==0.15.14
|
|
24
24
|
langchain_unstructured==0.1.5
|
|
25
|
+
unstructured-ingest==0.3.8
|
|
26
|
+
unstructured-ingest[confluence]
|
|
27
|
+
unstructured-ingest[github]
|
|
28
|
+
unstructured-ingest[jira]
|
|
25
29
|
html5lib==1.1 #beautifulsoup4 parser
|
|
26
30
|
|
|
27
31
|
#integrations
|
|
@@ -4,7 +4,7 @@ from math import floor
|
|
|
4
4
|
import asyncio, os
|
|
5
5
|
from datetime import datetime, timedelta
|
|
6
6
|
from enum import Enum
|
|
7
|
-
from typing import Annotated, Coroutine, Literal, TypeVar, Optional, Dict, Union, Any
|
|
7
|
+
from typing import Annotated, Coroutine, Literal, TypeVar, Optional, Dict, Union, Any, Callable
|
|
8
8
|
from pydantic import BaseModel, ConfigDict, Field, computed_field
|
|
9
9
|
from uuid import uuid4
|
|
10
10
|
from fastapi import APIRouter, HTTPException
|
|
@@ -171,7 +171,7 @@ class TaskManagerStrategy(ABC):
|
|
|
171
171
|
def task_cleanup_rule(self, task: TaskEntry) -> bool:
|
|
172
172
|
return task.status.status in {"completed", "failure"} and datetime.fromisoformat(task.status.metadata.end_at) < datetime.now() - timedelta(days=config.robot_task_retention_days)
|
|
173
173
|
|
|
174
|
-
def task_done_callback(self, task_entry: TaskEntry, headers: TaskHeader | None = None) ->
|
|
174
|
+
def task_done_callback(self, task_entry: TaskEntry, headers: TaskHeader | None = None) -> Callable:
|
|
175
175
|
def callback(task: asyncio.Task):
|
|
176
176
|
try:
|
|
177
177
|
result = task_entry.task.result()
|
|
@@ -188,9 +188,9 @@ class TaskManagerStrategy(ABC):
|
|
|
188
188
|
del self.running_tasks[task_entry.id]
|
|
189
189
|
#notify webhooks
|
|
190
190
|
if headers and headers.x_ws_bom_webhooks:
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
191
|
+
asyncio.create_task(
|
|
192
|
+
WebhookNotifier().notify_webhook(task_entry.status,headers.x_ws_bom_webhooks)
|
|
193
|
+
)
|
|
194
194
|
return callback
|
|
195
195
|
|
|
196
196
|
def create_task_entry(self, coroutine: asyncio.coroutines, headers: TaskHeader | None = None) -> TaskEntry:
|
|
@@ -76,6 +76,32 @@ def timer(func):
|
|
|
76
76
|
def atimer(func):
|
|
77
77
|
return _get_timer_wrapper(is_async=True)(func)
|
|
78
78
|
|
|
79
|
+
#profiler
|
|
80
|
+
def memory_leak_detector(func):
|
|
81
|
+
import tracemalloc, gc, sys
|
|
82
|
+
async def wrapper(*args, **kwargs):
|
|
83
|
+
# start tracking
|
|
84
|
+
tracemalloc.start()
|
|
85
|
+
initial_snapshot = tracemalloc.take_snapshot()
|
|
86
|
+
# run
|
|
87
|
+
result = await func(*args, **kwargs)
|
|
88
|
+
# take final snapshot
|
|
89
|
+
final_snapshot = tracemalloc.take_snapshot()
|
|
90
|
+
# compare snapshots
|
|
91
|
+
top_stats = final_snapshot.compare_to(initial_snapshot, 'lineno')
|
|
92
|
+
print(f"\nMemory Leak Analysis for {func.__name__}:")
|
|
93
|
+
for stat in top_stats[:10]:
|
|
94
|
+
print(stat)
|
|
95
|
+
# uncollectable objects
|
|
96
|
+
print("\n[ Uncollectable Objects ]")
|
|
97
|
+
print(gc.garbage)
|
|
98
|
+
print("\nGarbage Collector Stats:")
|
|
99
|
+
print(gc.get_stats())
|
|
100
|
+
# stop tracking
|
|
101
|
+
tracemalloc.stop()
|
|
102
|
+
return result
|
|
103
|
+
return wrapper
|
|
104
|
+
|
|
79
105
|
|
|
80
106
|
|
|
81
107
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ws_bom_robot_app
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.19
|
|
4
4
|
Summary: A FastAPI application serving ws bom/robot/llm platform ai.
|
|
5
5
|
Home-page: https://github.com/websolutespa/bom
|
|
6
6
|
Author: Websolute Spa
|
|
@@ -25,6 +25,10 @@ Requires-Dist: python-magic==0.4.27
|
|
|
25
25
|
Requires-Dist: opencv-python-headless==4.10.0.84
|
|
26
26
|
Requires-Dist: unstructured[all-docs]==0.15.14
|
|
27
27
|
Requires-Dist: langchain_unstructured==0.1.5
|
|
28
|
+
Requires-Dist: unstructured-ingest==0.3.8
|
|
29
|
+
Requires-Dist: unstructured-ingest[confluence]
|
|
30
|
+
Requires-Dist: unstructured-ingest[github]
|
|
31
|
+
Requires-Dist: unstructured-ingest[jira]
|
|
28
32
|
Requires-Dist: html5lib==1.1
|
|
29
33
|
Requires-Dist: markdownify==0.14.1
|
|
30
34
|
Requires-Dist: nebuly==0.3.33
|
|
@@ -13,6 +13,10 @@ python-magic==0.4.27
|
|
|
13
13
|
opencv-python-headless==4.10.0.84
|
|
14
14
|
unstructured[all-docs]==0.15.14
|
|
15
15
|
langchain_unstructured==0.1.5
|
|
16
|
+
unstructured-ingest==0.3.8
|
|
17
|
+
unstructured-ingest[confluence]
|
|
18
|
+
unstructured-ingest[github]
|
|
19
|
+
unstructured-ingest[jira]
|
|
16
20
|
html5lib==1.1
|
|
17
21
|
markdownify==0.14.1
|
|
18
22
|
nebuly==0.3.33
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/agent_description.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/tools/models/__init__.py
RENAMED
|
File without changes
|
{ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/tools/models/main.py
RENAMED
|
File without changes
|
{ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/tools/tool_builder.py
RENAMED
|
File without changes
|
{ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/tools/tool_manager.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/utils/agent_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app/llm/vector_store/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.17 → ws_bom_robot_app-0.0.19}/ws_bom_robot_app.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|