ws-bom-robot-app 0.0.33__py3-none-any.whl → 0.0.35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ws_bom_robot_app/config.py +10 -1
- ws_bom_robot_app/llm/agent_description.py +123 -124
- ws_bom_robot_app/llm/agent_handler.py +180 -167
- ws_bom_robot_app/llm/agent_lcel.py +54 -64
- ws_bom_robot_app/llm/api.py +33 -21
- ws_bom_robot_app/llm/defaut_prompt.py +15 -9
- ws_bom_robot_app/llm/main.py +109 -102
- ws_bom_robot_app/llm/models/api.py +55 -7
- ws_bom_robot_app/llm/models/kb.py +11 -2
- ws_bom_robot_app/llm/providers/__init__.py +0 -0
- ws_bom_robot_app/llm/providers/llm_manager.py +174 -0
- ws_bom_robot_app/llm/settings.py +4 -4
- ws_bom_robot_app/llm/tools/models/main.py +5 -3
- ws_bom_robot_app/llm/tools/tool_builder.py +23 -19
- ws_bom_robot_app/llm/tools/tool_manager.py +133 -101
- ws_bom_robot_app/llm/tools/utils.py +25 -25
- ws_bom_robot_app/llm/utils/agent_utils.py +17 -16
- ws_bom_robot_app/llm/utils/download.py +79 -79
- ws_bom_robot_app/llm/utils/print.py +29 -29
- ws_bom_robot_app/llm/utils/secrets.py +26 -0
- ws_bom_robot_app/llm/vector_store/generator.py +137 -137
- ws_bom_robot_app/llm/vector_store/integration/base.py +12 -1
- ws_bom_robot_app/llm/vector_store/loader/base.py +6 -5
- ws_bom_robot_app/llm/vector_store/loader/docling.py +27 -6
- ws_bom_robot_app/llm/vector_store/loader/json_loader.py +25 -25
- ws_bom_robot_app/main.py +7 -2
- {ws_bom_robot_app-0.0.33.dist-info → ws_bom_robot_app-0.0.35.dist-info}/METADATA +25 -12
- {ws_bom_robot_app-0.0.33.dist-info → ws_bom_robot_app-0.0.35.dist-info}/RECORD +30 -28
- ws_bom_robot_app/llm/utils/faiss_helper.py +0 -127
- {ws_bom_robot_app-0.0.33.dist-info → ws_bom_robot_app-0.0.35.dist-info}/WHEEL +0 -0
- {ws_bom_robot_app-0.0.33.dist-info → ws_bom_robot_app-0.0.35.dist-info}/top_level.txt +0 -0
|
@@ -1,25 +1,25 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from typing import Optional
|
|
3
|
-
from langchain_core.documents import Document
|
|
4
|
-
from langchain_community.document_loaders.base import BaseLoader
|
|
5
|
-
|
|
6
|
-
class JsonLoader(BaseLoader):
|
|
7
|
-
def __init__(self, file_path: str, meta_fields:Optional[list[str]] = [],encoding: Optional[str] = "utf-8"):
|
|
8
|
-
self.file_path = file_path
|
|
9
|
-
self.meta_fields = meta_fields
|
|
10
|
-
self.encoding = encoding
|
|
11
|
-
|
|
12
|
-
def load(self) -> list[Document]:
|
|
13
|
-
with open(self.file_path, "r", encoding=self.encoding) as file:
|
|
14
|
-
data = json.load(file)
|
|
15
|
-
_list = data if isinstance(data, list) else [data]
|
|
16
|
-
return [
|
|
17
|
-
Document(
|
|
18
|
-
page_content=json.dumps(item),
|
|
19
|
-
metadata={
|
|
20
|
-
"source": self.file_path,
|
|
21
|
-
**{field: item.get(field) for field in self.meta_fields if item.get(field)}
|
|
22
|
-
}
|
|
23
|
-
)
|
|
24
|
-
for item in _list
|
|
25
|
-
]
|
|
1
|
+
import json
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from langchain_core.documents import Document
|
|
4
|
+
from langchain_community.document_loaders.base import BaseLoader
|
|
5
|
+
|
|
6
|
+
class JsonLoader(BaseLoader):
|
|
7
|
+
def __init__(self, file_path: str, meta_fields:Optional[list[str]] = [],encoding: Optional[str] = "utf-8"):
|
|
8
|
+
self.file_path = file_path
|
|
9
|
+
self.meta_fields = meta_fields
|
|
10
|
+
self.encoding = encoding
|
|
11
|
+
|
|
12
|
+
def load(self) -> list[Document]:
|
|
13
|
+
with open(self.file_path, "r", encoding=self.encoding) as file:
|
|
14
|
+
data = json.load(file)
|
|
15
|
+
_list = data if isinstance(data, list) else [data]
|
|
16
|
+
return [
|
|
17
|
+
Document(
|
|
18
|
+
page_content=json.dumps(item),
|
|
19
|
+
metadata={
|
|
20
|
+
"source": self.file_path,
|
|
21
|
+
**{field: item.get(field) for field in self.meta_fields if item.get(field)}
|
|
22
|
+
}
|
|
23
|
+
)
|
|
24
|
+
for item in _list
|
|
25
|
+
]
|
ws_bom_robot_app/main.py
CHANGED
|
@@ -71,7 +71,9 @@ def __get_disk_info():
|
|
|
71
71
|
@app.get("/api/diag",tags=["diag"])
|
|
72
72
|
def diag(authenticate: bool = Depends(authenticate)):
|
|
73
73
|
import pkg_resources, psutil
|
|
74
|
-
from ws_bom_robot_app.llm.
|
|
74
|
+
from ws_bom_robot_app.llm.providers.llm_manager import LlmManager as wsllm
|
|
75
|
+
from ws_bom_robot_app.llm.vector_store.db.manager import VectorDbManager as wsdb
|
|
76
|
+
from ws_bom_robot_app.llm.vector_store.loader.base import Loader as wsldr
|
|
75
77
|
from ws_bom_robot_app.llm.vector_store.integration.manager import IntegrationManager as wsim
|
|
76
78
|
from ws_bom_robot_app.llm.tools.tool_manager import ToolManager as wstm
|
|
77
79
|
from ws_bom_robot_app.llm.agent_description import AgentDescriptor as wsad
|
|
@@ -90,6 +92,7 @@ def diag(authenticate: bool = Depends(authenticate)):
|
|
|
90
92
|
"platform": {
|
|
91
93
|
"node": platform.node(),
|
|
92
94
|
"system": platform.system(),
|
|
95
|
+
"platform": platform.platform(),
|
|
93
96
|
"version": platform.version(),
|
|
94
97
|
"type": platform.machine(),
|
|
95
98
|
"processor": platform.processor(),
|
|
@@ -132,7 +135,9 @@ def diag(authenticate: bool = Depends(authenticate)):
|
|
|
132
135
|
"config":config,
|
|
133
136
|
"runtime":config.runtime_options(),
|
|
134
137
|
"extension": {
|
|
135
|
-
"
|
|
138
|
+
"provider":({item[0]: type(item[1]).__name__} for item in wsllm._list.items()),
|
|
139
|
+
"db":({item[0]: type(item[1]).__name__} for item in wsdb._list.items()),
|
|
140
|
+
"loader": ({item[0]: item[1].loader.__name__ if item[1] else None} for item in sorted(wsldr._list.items(), key=lambda x: x[0]) if item[1]),
|
|
136
141
|
"integration":({item[0]: type(item[1]).__name__} for item in wsim._list.items()),
|
|
137
142
|
"tool": ({item[0]: item[1].function.__name__} for item in wstm._list.items()),
|
|
138
143
|
"agent":({item[0]: type(item[1]).__name__} for item in wsad._list.items())
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: ws_bom_robot_app
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.35
|
|
4
4
|
Summary: A FastAPI application serving ws bom/robot/llm platform ai.
|
|
5
5
|
Home-page: https://github.com/websolutespa/bom
|
|
6
6
|
Author: Websolute Spa
|
|
@@ -13,17 +13,25 @@ Description-Content-Type: text/markdown
|
|
|
13
13
|
Requires-Dist: standardwebhooks==1.0.0
|
|
14
14
|
Requires-Dist: apscheduler==3.11.0
|
|
15
15
|
Requires-Dist: aiofiles==24.1.0
|
|
16
|
-
Requires-Dist: pydantic==2.10.
|
|
16
|
+
Requires-Dist: pydantic==2.10.6
|
|
17
17
|
Requires-Dist: pydantic-settings==2.7.1
|
|
18
|
-
Requires-Dist: fastapi[standard]==0.115.
|
|
19
|
-
Requires-Dist: langchain==0.3.
|
|
20
|
-
Requires-Dist: langchain-
|
|
21
|
-
Requires-Dist: langchain-
|
|
22
|
-
Requires-Dist: langchain-
|
|
18
|
+
Requires-Dist: fastapi[standard]==0.115.8
|
|
19
|
+
Requires-Dist: langchain==0.3.18
|
|
20
|
+
Requires-Dist: langchain-community==0.3.17
|
|
21
|
+
Requires-Dist: langchain-core==0.3.34
|
|
22
|
+
Requires-Dist: langchain-openai==0.3.5
|
|
23
|
+
Requires-Dist: langchain-anthropic==0.3.6
|
|
24
|
+
Requires-Dist: langchain-google-genai==2.0.7
|
|
25
|
+
Requires-Dist: langchain-google-vertexai==2.0.13
|
|
23
26
|
Requires-Dist: faiss-cpu==1.9.0
|
|
24
|
-
Requires-Dist:
|
|
27
|
+
Requires-Dist: chromadb==0.6.3
|
|
28
|
+
Requires-Dist: langchain_chroma==0.2.1
|
|
29
|
+
Requires-Dist: fastembed==0.5.1
|
|
30
|
+
Requires-Dist: langchain-qdrant==0.2.0
|
|
31
|
+
Requires-Dist: lark==1.2.2
|
|
32
|
+
Requires-Dist: unstructured==0.16.17
|
|
25
33
|
Requires-Dist: unstructured[image]
|
|
26
|
-
Requires-Dist: unstructured-ingest==0.
|
|
34
|
+
Requires-Dist: unstructured-ingest==0.4.6
|
|
27
35
|
Requires-Dist: unstructured-ingest[azure]
|
|
28
36
|
Requires-Dist: unstructured-ingest[confluence]
|
|
29
37
|
Requires-Dist: unstructured-ingest[dropbox]
|
|
@@ -185,6 +193,7 @@ py -m pip install --upgrade setuptools build twine streamlit
|
|
|
185
193
|
### 🪛 build
|
|
186
194
|
|
|
187
195
|
```pwsh
|
|
196
|
+
if (Test-Path ./dist) {rm ./dist -r -force}; `
|
|
188
197
|
py -m build && twine check dist/*
|
|
189
198
|
```
|
|
190
199
|
|
|
@@ -217,14 +226,18 @@ prospector ./ws_bom_robot_app -t pyroma
|
|
|
217
226
|
lauch pytest
|
|
218
227
|
|
|
219
228
|
```pwsh
|
|
220
|
-
!py -m pip install -U pytest pytest-asyncio pytest-mock pytest-cov
|
|
229
|
+
!py -m pip install -U pytest pytest-asyncio pytest-mock pytest-cov pyclean
|
|
230
|
+
# clean cache if needed
|
|
231
|
+
# pyclean --verbose .
|
|
221
232
|
pytest --cov=ws_bom_robot_app --log-cli-level=info
|
|
233
|
+
# directory
|
|
234
|
+
# pytest --cov=ws_bom_robot_app --log-cli-level=info ./tests/app/llm/vector_store/db
|
|
222
235
|
```
|
|
223
236
|
|
|
224
237
|
launch debugger
|
|
225
238
|
|
|
226
239
|
```pwsh
|
|
227
|
-
streamlit run debugger.py --server.port
|
|
240
|
+
streamlit run debugger.py --server.port 6011
|
|
228
241
|
```
|
|
229
242
|
|
|
230
243
|
dockerize base image
|
|
@@ -242,7 +255,7 @@ dockerize app from src
|
|
|
242
255
|
|
|
243
256
|
```pwsh
|
|
244
257
|
docker build -f Dockerfile-src -t ws-bom-robot-app:src .
|
|
245
|
-
docker run --name ws-bom-robot-app-src -d -v "$(pwd)/ws_bom_robot_app:/app/ws_bom_robot_app" -v "$(pwd)/.data:/app/.data" -v "$(pwd)/tests:/app/tests" -v "$(pwd)/tmp:/tmp" -p
|
|
258
|
+
docker run --name ws-bom-robot-app-src -d -v "$(pwd)/ws_bom_robot_app:/app/ws_bom_robot_app" -v "$(pwd)/.data:/app/.data" -v "$(pwd)/tests:/app/tests" -v "$(pwd)/tmp:/tmp" -p 6002:6001 ws-bom-robot-app:src
|
|
246
259
|
```
|
|
247
260
|
|
|
248
261
|
### ✈️ publish
|
|
@@ -1,41 +1,43 @@
|
|
|
1
1
|
ws_bom_robot_app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
ws_bom_robot_app/auth.py,sha256=84nIbmJsMrNs0sxIQGEHbjsjc2P6ZrZZGSn8dkiL6is,895
|
|
3
|
-
ws_bom_robot_app/config.py,sha256=
|
|
3
|
+
ws_bom_robot_app/config.py,sha256=zjqr_tx8tkxZ94YXrZ-xQFZiM5es3mcQlZG1VEnj0ac,3635
|
|
4
4
|
ws_bom_robot_app/cron_manager.py,sha256=0Yt5AMTPGlXZ_M5ck0SKMX8wvzoPsseEezg_s0Q3HKY,9224
|
|
5
|
-
ws_bom_robot_app/main.py,sha256=
|
|
5
|
+
ws_bom_robot_app/main.py,sha256=zO3B-v-v9ESASvw8IaQj9Y9hNvNmOxohFmA0R82EybQ,6518
|
|
6
6
|
ws_bom_robot_app/task_manager.py,sha256=Zedzs2R3O-wNSQOqs4jorgFwPRi-ji_0TN4mGfk-VvE,15958
|
|
7
7
|
ws_bom_robot_app/util.py,sha256=b49ItlZgh2Wzw-6K8k5Wa44eVgjQ0JmWQwJnEaQBVGw,3502
|
|
8
8
|
ws_bom_robot_app/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
ws_bom_robot_app/llm/agent_description.py,sha256=
|
|
10
|
-
ws_bom_robot_app/llm/agent_handler.py,sha256=
|
|
11
|
-
ws_bom_robot_app/llm/agent_lcel.py,sha256=
|
|
12
|
-
ws_bom_robot_app/llm/api.py,sha256=
|
|
13
|
-
ws_bom_robot_app/llm/defaut_prompt.py,sha256=
|
|
14
|
-
ws_bom_robot_app/llm/main.py,sha256=
|
|
15
|
-
ws_bom_robot_app/llm/settings.py,sha256=
|
|
9
|
+
ws_bom_robot_app/llm/agent_description.py,sha256=XLrDcUk-4OtBFPv1Yscxga3ETHNnYCIC1uxHtWD94B8,4663
|
|
10
|
+
ws_bom_robot_app/llm/agent_handler.py,sha256=4zdpSf5iVLxMZ90c_vUl_k-O9SF6u_h7GOB24y4mhIo,6435
|
|
11
|
+
ws_bom_robot_app/llm/agent_lcel.py,sha256=BUfGVUcw6s_YAu5aPMkqqjsStYNHUXKh31t_Ybx11-A,2395
|
|
12
|
+
ws_bom_robot_app/llm/api.py,sha256=UaD1oJyAOe7ASoXxPNJcth3kDuWcjk1xqUNEjuPWbR4,3759
|
|
13
|
+
ws_bom_robot_app/llm/defaut_prompt.py,sha256=LlCd_nSMkMmHESfiiiQYfnJyB6Pp-LSs4CEKdYW4vFk,1106
|
|
14
|
+
ws_bom_robot_app/llm/main.py,sha256=Qw4SbZtstXo31bWFLRQmm_t8BEKlqvgXmtYbxM2F6_Y,4138
|
|
15
|
+
ws_bom_robot_app/llm/settings.py,sha256=EkFGCppORenStH9W4e6_dYvQ-5p6xiEMpmUHBqNqG9M,117
|
|
16
16
|
ws_bom_robot_app/llm/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
ws_bom_robot_app/llm/models/api.py,sha256=
|
|
17
|
+
ws_bom_robot_app/llm/models/api.py,sha256=mLbPG7jHh1EjgQG-xpBhEgiTIHpK35HZ51obgqQSfq4,8890
|
|
18
18
|
ws_bom_robot_app/llm/models/base.py,sha256=1TqxuTK3rjJEALn7lvgoen_1ba3R2brAgGx6EDTtDZo,152
|
|
19
|
-
ws_bom_robot_app/llm/models/kb.py,sha256=
|
|
19
|
+
ws_bom_robot_app/llm/models/kb.py,sha256=oVSw6_dmNxikAHrPqcfxDXz9M0ezLIYuxpgvzfs_Now,9514
|
|
20
|
+
ws_bom_robot_app/llm/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
+
ws_bom_robot_app/llm/providers/llm_manager.py,sha256=ONPeIkMracB3R4UScjgb1s5GZm29kAWOIzT0T8cjvvo,6179
|
|
20
22
|
ws_bom_robot_app/llm/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
-
ws_bom_robot_app/llm/tools/tool_builder.py,sha256=
|
|
22
|
-
ws_bom_robot_app/llm/tools/tool_manager.py,sha256
|
|
23
|
-
ws_bom_robot_app/llm/tools/utils.py,sha256=
|
|
23
|
+
ws_bom_robot_app/llm/tools/tool_builder.py,sha256=OaA0jReNUpjfe7c8TVLM86acQ4w0cQaR3NE22hGKJb0,1165
|
|
24
|
+
ws_bom_robot_app/llm/tools/tool_manager.py,sha256=RZcJVPyWT9D3HUxSO1d5kSfTQtJB2CG5hocuFa01AzY,5816
|
|
25
|
+
ws_bom_robot_app/llm/tools/utils.py,sha256=SPC8pj2bt_xWO7wNR_5YBwUUvjJIK1xlavR4yfW4J-0,1320
|
|
24
26
|
ws_bom_robot_app/llm/tools/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
|
-
ws_bom_robot_app/llm/tools/models/main.py,sha256=
|
|
27
|
+
ws_bom_robot_app/llm/tools/models/main.py,sha256=o3Rwbn5nsugKOgLG0FUIuvtPPHYhfVpqG4E3BQB2nWM,388
|
|
26
28
|
ws_bom_robot_app/llm/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
-
ws_bom_robot_app/llm/utils/agent_utils.py,sha256=
|
|
29
|
+
ws_bom_robot_app/llm/utils/agent_utils.py,sha256=9fdnVMHpSEBdmYef6NFXtMIXa8EB4QQICfXsXDiafHg,923
|
|
28
30
|
ws_bom_robot_app/llm/utils/chunker.py,sha256=N7570xBYlObneg-fsvDhPAJ-Pv8C8OaYZOBK6q7LmMI,607
|
|
29
|
-
ws_bom_robot_app/llm/utils/download.py,sha256=
|
|
30
|
-
ws_bom_robot_app/llm/utils/faiss_helper.py,sha256=VikpopCpEzV1lN5JISDabpHcIUkNDACNL52KliB4Hxs,5224
|
|
31
|
+
ws_bom_robot_app/llm/utils/download.py,sha256=iAUxH_NiCpTPtGzhC4hBtxotd2HPFt2MBhttslIxqiI,3194
|
|
31
32
|
ws_bom_robot_app/llm/utils/kb.py,sha256=jja45WCbNI7SGEgqDS99nErlwB5eY8Ga7BMnhdMHZ90,1279
|
|
32
|
-
ws_bom_robot_app/llm/utils/print.py,sha256=
|
|
33
|
+
ws_bom_robot_app/llm/utils/print.py,sha256=ZonoLPcfM6Cpw4_Ec455LiCovExOwvnIgvw1QORSCBY,799
|
|
34
|
+
ws_bom_robot_app/llm/utils/secrets.py,sha256=-HtqLIDVIJrpvGC5YhPAVyLsq8P4ChVM5g3GOfdwqVk,878
|
|
33
35
|
ws_bom_robot_app/llm/utils/webhooks.py,sha256=LAAZqyN6VhV13wu4X-X85TwdDgAV2rNvIwQFIIc0FJM,2114
|
|
34
36
|
ws_bom_robot_app/llm/vector_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
-
ws_bom_robot_app/llm/vector_store/generator.py,sha256=
|
|
37
|
+
ws_bom_robot_app/llm/vector_store/generator.py,sha256=9_xdtCKJhmt1OP0GXDjvFERXMP7ozLZT92KuYEBDgC0,6314
|
|
36
38
|
ws_bom_robot_app/llm/vector_store/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
39
|
ws_bom_robot_app/llm/vector_store/integration/azure.py,sha256=R37TaPQP-HJJJiaKE9rmMc9kpeXeRvdebbTY_982om0,3392
|
|
38
|
-
ws_bom_robot_app/llm/vector_store/integration/base.py,sha256=
|
|
40
|
+
ws_bom_robot_app/llm/vector_store/integration/base.py,sha256=8r6XO_XM8PcDXhsKst6q_Xw-P48rCiEtowmkBEDVd08,1957
|
|
39
41
|
ws_bom_robot_app/llm/vector_store/integration/confluence.py,sha256=4fiRHB3J-SHZZxNGHwVkCrT-xSPbc91z4WrDE9fy6xU,2505
|
|
40
42
|
ws_bom_robot_app/llm/vector_store/integration/dropbox.py,sha256=yhGvHTN0TEpUfhdvvV7RX5MxBwTUyddAX95Fgqp3mCg,2629
|
|
41
43
|
ws_bom_robot_app/llm/vector_store/integration/gcs.py,sha256=fFDVDUR6eNB7FVTzDSEpMHFEWMgG16GLnpSf_mqGDdE,3184
|
|
@@ -49,10 +51,10 @@ ws_bom_robot_app/llm/vector_store/integration/sharepoint.py,sha256=zqqn-6qPrK50P
|
|
|
49
51
|
ws_bom_robot_app/llm/vector_store/integration/sitemap.py,sha256=4WYj3C6Y_4vkGs5iUNR59l1YOZEDsQT8MnZ5rIYDL_k,4733
|
|
50
52
|
ws_bom_robot_app/llm/vector_store/integration/slack.py,sha256=FMjESXm2QetFXI6i8epze7Kbbu22fV8CVaxb71AHnJ8,2572
|
|
51
53
|
ws_bom_robot_app/llm/vector_store/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
|
-
ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=
|
|
53
|
-
ws_bom_robot_app/llm/vector_store/loader/docling.py,sha256=
|
|
54
|
-
ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=
|
|
55
|
-
ws_bom_robot_app-0.0.
|
|
56
|
-
ws_bom_robot_app-0.0.
|
|
57
|
-
ws_bom_robot_app-0.0.
|
|
58
|
-
ws_bom_robot_app-0.0.
|
|
54
|
+
ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=L_ugekNuAq0N9O-24wtlHSNHkqSeD-KsJrfGt_FX9Oc,5340
|
|
55
|
+
ws_bom_robot_app/llm/vector_store/loader/docling.py,sha256=yP0zgXLeFAlByaYuj-6cYariuknckrFds0dxdRcnVz8,3456
|
|
56
|
+
ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=qo9ejRZyKv_k6jnGgXnu1W5uqsMMtgqK_uvPpZQ0p74,833
|
|
57
|
+
ws_bom_robot_app-0.0.35.dist-info/METADATA,sha256=gwxWX98wtRsmod8DpysLX38QBX_Rl0MHdyCnclliUTs,8270
|
|
58
|
+
ws_bom_robot_app-0.0.35.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
59
|
+
ws_bom_robot_app-0.0.35.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
|
|
60
|
+
ws_bom_robot_app-0.0.35.dist-info/RECORD,,
|
|
@@ -1,127 +0,0 @@
|
|
|
1
|
-
from langchain_community.vectorstores.faiss import FAISS
|
|
2
|
-
from langchain_core.documents import Document
|
|
3
|
-
from langchain_core.vectorstores.base import VectorStoreRetriever
|
|
4
|
-
from langchain_openai import OpenAIEmbeddings
|
|
5
|
-
from typing import Any
|
|
6
|
-
import asyncio, gc, logging
|
|
7
|
-
from langchain_text_splitters import CharacterTextSplitter
|
|
8
|
-
from pydantic import SecretStr
|
|
9
|
-
|
|
10
|
-
from ws_bom_robot_app.llm.utils.chunker import DocumentChunker
|
|
11
|
-
|
|
12
|
-
class FaissHelper():
|
|
13
|
-
_embedding_model = "text-embedding-3-small"
|
|
14
|
-
_CACHE: dict[str, FAISS] = {}
|
|
15
|
-
|
|
16
|
-
@staticmethod
|
|
17
|
-
#@timer
|
|
18
|
-
async def create(documents: list[Document], folder_path: str, api_key: SecretStr, return_folder_path:bool = False) -> str | None:
|
|
19
|
-
try:
|
|
20
|
-
embeddings = OpenAIEmbeddings(api_key=api_key, model=FaissHelper._embedding_model)
|
|
21
|
-
faiss_instance = await asyncio.to_thread(
|
|
22
|
-
FAISS.from_documents,
|
|
23
|
-
DocumentChunker.chunk(documents),
|
|
24
|
-
embeddings)
|
|
25
|
-
await asyncio.to_thread(faiss_instance.save_local, folder_path)
|
|
26
|
-
del faiss_instance, embeddings
|
|
27
|
-
gc.collect()
|
|
28
|
-
if return_folder_path:
|
|
29
|
-
return folder_path
|
|
30
|
-
return None
|
|
31
|
-
except Exception as e:
|
|
32
|
-
logging.error(f"Failed to create Faiss instance: {e}")
|
|
33
|
-
return None
|
|
34
|
-
finally:
|
|
35
|
-
if 'documents' in locals():
|
|
36
|
-
del documents
|
|
37
|
-
gc.collect()
|
|
38
|
-
|
|
39
|
-
@staticmethod
|
|
40
|
-
#@timer
|
|
41
|
-
def get_loader(folder_path:str,api_key:SecretStr) -> FAISS:
|
|
42
|
-
"""_summary_
|
|
43
|
-
|
|
44
|
-
Args:
|
|
45
|
-
folder_path (str): _description_
|
|
46
|
-
api_key (str): _description_
|
|
47
|
-
|
|
48
|
-
Returns:
|
|
49
|
-
FAISS: _description_
|
|
50
|
-
"""
|
|
51
|
-
if not folder_path in FaissHelper._CACHE:
|
|
52
|
-
_faiss = FAISS.load_local(
|
|
53
|
-
folder_path=folder_path,
|
|
54
|
-
embeddings=OpenAIEmbeddings(api_key=api_key, model=FaissHelper._embedding_model),
|
|
55
|
-
allow_dangerous_deserialization=True
|
|
56
|
-
)
|
|
57
|
-
FaissHelper._CACHE[folder_path] = _faiss
|
|
58
|
-
return FaissHelper._CACHE[folder_path]
|
|
59
|
-
|
|
60
|
-
@staticmethod
|
|
61
|
-
#@timer
|
|
62
|
-
def get_retriever(folder_path:str,api_key:SecretStr,search_type=str, search_kwargs= dict[str,Any]) -> VectorStoreRetriever:
|
|
63
|
-
"""_summary_
|
|
64
|
-
|
|
65
|
-
Args:
|
|
66
|
-
folder_path (str): _description_
|
|
67
|
-
api_key (str): _description_
|
|
68
|
-
|
|
69
|
-
Returns:
|
|
70
|
-
VectorStoreRetriever: _description_
|
|
71
|
-
"""
|
|
72
|
-
_faiss = FaissHelper.get_loader(folder_path,api_key)
|
|
73
|
-
return _faiss.as_retriever(search_type=search_type, search_kwargs=search_kwargs)
|
|
74
|
-
@staticmethod
|
|
75
|
-
#@atimer
|
|
76
|
-
async def _combine_search(retrievers: list[VectorStoreRetriever], query:str) -> list[Document]:
|
|
77
|
-
"""_summary_
|
|
78
|
-
|
|
79
|
-
Args:
|
|
80
|
-
list(VectorStoreRetriever): _description_
|
|
81
|
-
|
|
82
|
-
Returns:
|
|
83
|
-
list[Document]: _description_
|
|
84
|
-
"""
|
|
85
|
-
def _remove_duplicates(docs: list[Document]) -> list[Document]:
|
|
86
|
-
"""Remove duplicate documents based on content"""
|
|
87
|
-
seen_contents = set()
|
|
88
|
-
unique_docs = []
|
|
89
|
-
for doc in docs:
|
|
90
|
-
if doc.page_content not in seen_contents:
|
|
91
|
-
seen_contents.add(doc.page_content)
|
|
92
|
-
unique_docs.append(doc)
|
|
93
|
-
return unique_docs
|
|
94
|
-
# Perform the searches concurrently
|
|
95
|
-
search_tasks = [retriever.ainvoke(query) for retriever in retrievers]
|
|
96
|
-
search_results = await asyncio.gather(*search_tasks)
|
|
97
|
-
# Combine and de-duplicate the results
|
|
98
|
-
all_docs = _remove_duplicates([doc for docs in search_results for doc in docs])
|
|
99
|
-
return all_docs
|
|
100
|
-
@staticmethod
|
|
101
|
-
#@atimer
|
|
102
|
-
async def invoke(folder_path:str,api_key:SecretStr, query:str, search_type=str, search_kwargs= dict[str,Any]) -> list[Document]:
|
|
103
|
-
"""_summary_
|
|
104
|
-
Args:
|
|
105
|
-
folder_path (str): _description_
|
|
106
|
-
api_key (str): _description_
|
|
107
|
-
query (str): _description_
|
|
108
|
-
search_type (str): _description_
|
|
109
|
-
search_kwargs (dict[str,Any]): _description_
|
|
110
|
-
k: Number of documents to retrieve
|
|
111
|
-
fetch_k: Number of documents to fetch for MMR selection (if None, defaults to 2 * k)
|
|
112
|
-
lambda_mult: MMR diversity parameter (0 = max diversity, 1 = max similarity)
|
|
113
|
-
Returns:
|
|
114
|
-
list[Document]: _description_
|
|
115
|
-
"""
|
|
116
|
-
if (search_type == "mixed"):
|
|
117
|
-
similarity_retriever = FaissHelper.get_retriever(folder_path,api_key,"similarity",search_kwargs) # type: ignore
|
|
118
|
-
mmr_kwargs = {
|
|
119
|
-
"k": search_kwargs.get("k",4), # type: ignore
|
|
120
|
-
"fetch_k": search_kwargs.get("fetch_k",20), #type: ignore
|
|
121
|
-
"lambda_mult": search_kwargs.get("lambda_mult", 0.2), # type: ignore
|
|
122
|
-
}
|
|
123
|
-
search_kwargs.update(mmr_kwargs)
|
|
124
|
-
mmr_retriever = FaissHelper.get_retriever(folder_path,api_key,"mmr",search_kwargs) # type: ignore
|
|
125
|
-
return await FaissHelper._combine_search([similarity_retriever, mmr_retriever], query)
|
|
126
|
-
return await FaissHelper.get_retriever(folder_path,api_key,search_type,search_kwargs).ainvoke(query)
|
|
127
|
-
|
|
File without changes
|
|
File without changes
|