ws-bom-robot-app 0.0.33__py3-none-any.whl → 0.0.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. ws_bom_robot_app/config.py +10 -1
  2. ws_bom_robot_app/llm/agent_description.py +123 -124
  3. ws_bom_robot_app/llm/agent_handler.py +180 -167
  4. ws_bom_robot_app/llm/agent_lcel.py +54 -64
  5. ws_bom_robot_app/llm/api.py +33 -21
  6. ws_bom_robot_app/llm/defaut_prompt.py +15 -9
  7. ws_bom_robot_app/llm/main.py +109 -102
  8. ws_bom_robot_app/llm/models/api.py +55 -7
  9. ws_bom_robot_app/llm/models/kb.py +11 -2
  10. ws_bom_robot_app/llm/providers/__init__.py +0 -0
  11. ws_bom_robot_app/llm/providers/llm_manager.py +174 -0
  12. ws_bom_robot_app/llm/settings.py +4 -4
  13. ws_bom_robot_app/llm/tools/models/main.py +5 -3
  14. ws_bom_robot_app/llm/tools/tool_builder.py +23 -19
  15. ws_bom_robot_app/llm/tools/tool_manager.py +133 -101
  16. ws_bom_robot_app/llm/tools/utils.py +25 -25
  17. ws_bom_robot_app/llm/utils/agent_utils.py +17 -16
  18. ws_bom_robot_app/llm/utils/download.py +79 -79
  19. ws_bom_robot_app/llm/utils/print.py +29 -29
  20. ws_bom_robot_app/llm/utils/secrets.py +26 -0
  21. ws_bom_robot_app/llm/vector_store/generator.py +137 -137
  22. ws_bom_robot_app/llm/vector_store/integration/base.py +12 -1
  23. ws_bom_robot_app/llm/vector_store/loader/base.py +6 -5
  24. ws_bom_robot_app/llm/vector_store/loader/docling.py +27 -6
  25. ws_bom_robot_app/llm/vector_store/loader/json_loader.py +25 -25
  26. ws_bom_robot_app/main.py +7 -2
  27. {ws_bom_robot_app-0.0.33.dist-info → ws_bom_robot_app-0.0.35.dist-info}/METADATA +25 -12
  28. {ws_bom_robot_app-0.0.33.dist-info → ws_bom_robot_app-0.0.35.dist-info}/RECORD +30 -28
  29. ws_bom_robot_app/llm/utils/faiss_helper.py +0 -127
  30. {ws_bom_robot_app-0.0.33.dist-info → ws_bom_robot_app-0.0.35.dist-info}/WHEEL +0 -0
  31. {ws_bom_robot_app-0.0.33.dist-info → ws_bom_robot_app-0.0.35.dist-info}/top_level.txt +0 -0
@@ -1,25 +1,25 @@
1
- import json
2
- from typing import Optional
3
- from langchain_core.documents import Document
4
- from langchain_community.document_loaders.base import BaseLoader
5
-
6
- class JsonLoader(BaseLoader):
7
- def __init__(self, file_path: str, meta_fields:Optional[list[str]] = [],encoding: Optional[str] = "utf-8"):
8
- self.file_path = file_path
9
- self.meta_fields = meta_fields
10
- self.encoding = encoding
11
-
12
- def load(self) -> list[Document]:
13
- with open(self.file_path, "r", encoding=self.encoding) as file:
14
- data = json.load(file)
15
- _list = data if isinstance(data, list) else [data]
16
- return [
17
- Document(
18
- page_content=json.dumps(item),
19
- metadata={
20
- "source": self.file_path,
21
- **{field: item.get(field) for field in self.meta_fields if item.get(field)}
22
- }
23
- )
24
- for item in _list
25
- ]
1
+ import json
2
+ from typing import Optional
3
+ from langchain_core.documents import Document
4
+ from langchain_community.document_loaders.base import BaseLoader
5
+
6
+ class JsonLoader(BaseLoader):
7
+ def __init__(self, file_path: str, meta_fields:Optional[list[str]] = [],encoding: Optional[str] = "utf-8"):
8
+ self.file_path = file_path
9
+ self.meta_fields = meta_fields
10
+ self.encoding = encoding
11
+
12
+ def load(self) -> list[Document]:
13
+ with open(self.file_path, "r", encoding=self.encoding) as file:
14
+ data = json.load(file)
15
+ _list = data if isinstance(data, list) else [data]
16
+ return [
17
+ Document(
18
+ page_content=json.dumps(item),
19
+ metadata={
20
+ "source": self.file_path,
21
+ **{field: item.get(field) for field in self.meta_fields if item.get(field)}
22
+ }
23
+ )
24
+ for item in _list
25
+ ]
ws_bom_robot_app/main.py CHANGED
@@ -71,7 +71,9 @@ def __get_disk_info():
71
71
  @app.get("/api/diag",tags=["diag"])
72
72
  def diag(authenticate: bool = Depends(authenticate)):
73
73
  import pkg_resources, psutil
74
- from ws_bom_robot_app.llm.vector_store.loader.base import Loader as wsll
74
+ from ws_bom_robot_app.llm.providers.llm_manager import LlmManager as wsllm
75
+ from ws_bom_robot_app.llm.vector_store.db.manager import VectorDbManager as wsdb
76
+ from ws_bom_robot_app.llm.vector_store.loader.base import Loader as wsldr
75
77
  from ws_bom_robot_app.llm.vector_store.integration.manager import IntegrationManager as wsim
76
78
  from ws_bom_robot_app.llm.tools.tool_manager import ToolManager as wstm
77
79
  from ws_bom_robot_app.llm.agent_description import AgentDescriptor as wsad
@@ -90,6 +92,7 @@ def diag(authenticate: bool = Depends(authenticate)):
90
92
  "platform": {
91
93
  "node": platform.node(),
92
94
  "system": platform.system(),
95
+ "platform": platform.platform(),
93
96
  "version": platform.version(),
94
97
  "type": platform.machine(),
95
98
  "processor": platform.processor(),
@@ -132,7 +135,9 @@ def diag(authenticate: bool = Depends(authenticate)):
132
135
  "config":config,
133
136
  "runtime":config.runtime_options(),
134
137
  "extension": {
135
- "loader": ({item[0]: item[1].loader.__name__ if item[1] else None} for item in sorted(wsll._list.items(), key=lambda x: x[0]) if item[1]),
138
+ "provider":({item[0]: type(item[1]).__name__} for item in wsllm._list.items()),
139
+ "db":({item[0]: type(item[1]).__name__} for item in wsdb._list.items()),
140
+ "loader": ({item[0]: item[1].loader.__name__ if item[1] else None} for item in sorted(wsldr._list.items(), key=lambda x: x[0]) if item[1]),
136
141
  "integration":({item[0]: type(item[1]).__name__} for item in wsim._list.items()),
137
142
  "tool": ({item[0]: item[1].function.__name__} for item in wstm._list.items()),
138
143
  "agent":({item[0]: type(item[1]).__name__} for item in wsad._list.items())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.33
3
+ Version: 0.0.35
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -13,17 +13,25 @@ Description-Content-Type: text/markdown
13
13
  Requires-Dist: standardwebhooks==1.0.0
14
14
  Requires-Dist: apscheduler==3.11.0
15
15
  Requires-Dist: aiofiles==24.1.0
16
- Requires-Dist: pydantic==2.10.5
16
+ Requires-Dist: pydantic==2.10.6
17
17
  Requires-Dist: pydantic-settings==2.7.1
18
- Requires-Dist: fastapi[standard]==0.115.6
19
- Requires-Dist: langchain==0.3.14
20
- Requires-Dist: langchain-openai==0.3.0
21
- Requires-Dist: langchain-community==0.3.14
22
- Requires-Dist: langchain-core==0.3.29
18
+ Requires-Dist: fastapi[standard]==0.115.8
19
+ Requires-Dist: langchain==0.3.18
20
+ Requires-Dist: langchain-community==0.3.17
21
+ Requires-Dist: langchain-core==0.3.34
22
+ Requires-Dist: langchain-openai==0.3.5
23
+ Requires-Dist: langchain-anthropic==0.3.6
24
+ Requires-Dist: langchain-google-genai==2.0.7
25
+ Requires-Dist: langchain-google-vertexai==2.0.13
23
26
  Requires-Dist: faiss-cpu==1.9.0
24
- Requires-Dist: unstructured==0.16.13
27
+ Requires-Dist: chromadb==0.6.3
28
+ Requires-Dist: langchain_chroma==0.2.1
29
+ Requires-Dist: fastembed==0.5.1
30
+ Requires-Dist: langchain-qdrant==0.2.0
31
+ Requires-Dist: lark==1.2.2
32
+ Requires-Dist: unstructured==0.16.17
25
33
  Requires-Dist: unstructured[image]
26
- Requires-Dist: unstructured-ingest==0.3.14
34
+ Requires-Dist: unstructured-ingest==0.4.6
27
35
  Requires-Dist: unstructured-ingest[azure]
28
36
  Requires-Dist: unstructured-ingest[confluence]
29
37
  Requires-Dist: unstructured-ingest[dropbox]
@@ -185,6 +193,7 @@ py -m pip install --upgrade setuptools build twine streamlit
185
193
  ### 🪛 build
186
194
 
187
195
  ```pwsh
196
+ if (Test-Path ./dist) {rm ./dist -r -force}; `
188
197
  py -m build && twine check dist/*
189
198
  ```
190
199
 
@@ -217,14 +226,18 @@ prospector ./ws_bom_robot_app -t pyroma
217
226
  lauch pytest
218
227
 
219
228
  ```pwsh
220
- !py -m pip install -U pytest pytest-asyncio pytest-mock pytest-cov
229
+ !py -m pip install -U pytest pytest-asyncio pytest-mock pytest-cov pyclean
230
+ # clean cache if needed
231
+ # pyclean --verbose .
221
232
  pytest --cov=ws_bom_robot_app --log-cli-level=info
233
+ # directory
234
+ # pytest --cov=ws_bom_robot_app --log-cli-level=info ./tests/app/llm/vector_store/db
222
235
  ```
223
236
 
224
237
  launch debugger
225
238
 
226
239
  ```pwsh
227
- streamlit run debugger.py --server.port 6002
240
+ streamlit run debugger.py --server.port 6011
228
241
  ```
229
242
 
230
243
  dockerize base image
@@ -242,7 +255,7 @@ dockerize app from src
242
255
 
243
256
  ```pwsh
244
257
  docker build -f Dockerfile-src -t ws-bom-robot-app:src .
245
- docker run --name ws-bom-robot-app-src -d -v "$(pwd)/ws_bom_robot_app:/app/ws_bom_robot_app" -v "$(pwd)/.data:/app/.data" -v "$(pwd)/tests:/app/tests" -v "$(pwd)/tmp:/tmp" -p 6001:6001 ws-bom-robot-app:src
258
+ docker run --name ws-bom-robot-app-src -d -v "$(pwd)/ws_bom_robot_app:/app/ws_bom_robot_app" -v "$(pwd)/.data:/app/.data" -v "$(pwd)/tests:/app/tests" -v "$(pwd)/tmp:/tmp" -p 6002:6001 ws-bom-robot-app:src
246
259
  ```
247
260
 
248
261
  ### ✈️ publish
@@ -1,41 +1,43 @@
1
1
  ws_bom_robot_app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  ws_bom_robot_app/auth.py,sha256=84nIbmJsMrNs0sxIQGEHbjsjc2P6ZrZZGSn8dkiL6is,895
3
- ws_bom_robot_app/config.py,sha256=c4ybGaEwEL5d3od2ebAwNGJ4gtEMtvKNqZytAfdDxMY,3118
3
+ ws_bom_robot_app/config.py,sha256=zjqr_tx8tkxZ94YXrZ-xQFZiM5es3mcQlZG1VEnj0ac,3635
4
4
  ws_bom_robot_app/cron_manager.py,sha256=0Yt5AMTPGlXZ_M5ck0SKMX8wvzoPsseEezg_s0Q3HKY,9224
5
- ws_bom_robot_app/main.py,sha256=vChP8vfmOCbs51TPUsaaxX8FvoFXuURMkOgmgx0Xi_4,6121
5
+ ws_bom_robot_app/main.py,sha256=zO3B-v-v9ESASvw8IaQj9Y9hNvNmOxohFmA0R82EybQ,6518
6
6
  ws_bom_robot_app/task_manager.py,sha256=Zedzs2R3O-wNSQOqs4jorgFwPRi-ji_0TN4mGfk-VvE,15958
7
7
  ws_bom_robot_app/util.py,sha256=b49ItlZgh2Wzw-6K8k5Wa44eVgjQ0JmWQwJnEaQBVGw,3502
8
8
  ws_bom_robot_app/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- ws_bom_robot_app/llm/agent_description.py,sha256=80g9LKRb0cHDSvwlr9MbtUCkaD9Yh6Zm-wR11ak21KM,4829
10
- ws_bom_robot_app/llm/agent_handler.py,sha256=7b-H6PCkeFt4gDFv4oXO_Mg6A59MepWm0qwosvjgaw0,5975
11
- ws_bom_robot_app/llm/agent_lcel.py,sha256=O4FgzvKXuIP9VJgSBQtP26Xifx1r-fydD0LtlqUYzug,2730
12
- ws_bom_robot_app/llm/api.py,sha256=vBu_TFTlBjp7e3J-WmlZbXn_TbB550x-NpQN4YsO7To,3004
13
- ws_bom_robot_app/llm/defaut_prompt.py,sha256=CDsM6I6vOTklOKD1FK0v93P4TLPjdq7iCQ7wszCs6yE,765
14
- ws_bom_robot_app/llm/main.py,sha256=OgjWceJca1d3zd9TJHfdalcXAZMCyr8aO6VZGuqLWsY,4037
15
- ws_bom_robot_app/llm/settings.py,sha256=DCLaGZwxlw0xE46LpfUgin_FHD8_XJIthCgI6r2UDlM,121
9
+ ws_bom_robot_app/llm/agent_description.py,sha256=XLrDcUk-4OtBFPv1Yscxga3ETHNnYCIC1uxHtWD94B8,4663
10
+ ws_bom_robot_app/llm/agent_handler.py,sha256=4zdpSf5iVLxMZ90c_vUl_k-O9SF6u_h7GOB24y4mhIo,6435
11
+ ws_bom_robot_app/llm/agent_lcel.py,sha256=BUfGVUcw6s_YAu5aPMkqqjsStYNHUXKh31t_Ybx11-A,2395
12
+ ws_bom_robot_app/llm/api.py,sha256=UaD1oJyAOe7ASoXxPNJcth3kDuWcjk1xqUNEjuPWbR4,3759
13
+ ws_bom_robot_app/llm/defaut_prompt.py,sha256=LlCd_nSMkMmHESfiiiQYfnJyB6Pp-LSs4CEKdYW4vFk,1106
14
+ ws_bom_robot_app/llm/main.py,sha256=Qw4SbZtstXo31bWFLRQmm_t8BEKlqvgXmtYbxM2F6_Y,4138
15
+ ws_bom_robot_app/llm/settings.py,sha256=EkFGCppORenStH9W4e6_dYvQ-5p6xiEMpmUHBqNqG9M,117
16
16
  ws_bom_robot_app/llm/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- ws_bom_robot_app/llm/models/api.py,sha256=ebyzuXcNNan8xzR94SfediHMJ5PymY4GoPYlfFQPdq4,6583
17
+ ws_bom_robot_app/llm/models/api.py,sha256=mLbPG7jHh1EjgQG-xpBhEgiTIHpK35HZ51obgqQSfq4,8890
18
18
  ws_bom_robot_app/llm/models/base.py,sha256=1TqxuTK3rjJEALn7lvgoen_1ba3R2brAgGx6EDTtDZo,152
19
- ws_bom_robot_app/llm/models/kb.py,sha256=9zqwDlVULVrWE48wo5AivzWoOtnjA57k9rsw8KNnyDk,8935
19
+ ws_bom_robot_app/llm/models/kb.py,sha256=oVSw6_dmNxikAHrPqcfxDXz9M0ezLIYuxpgvzfs_Now,9514
20
+ ws_bom_robot_app/llm/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
+ ws_bom_robot_app/llm/providers/llm_manager.py,sha256=ONPeIkMracB3R4UScjgb1s5GZm29kAWOIzT0T8cjvvo,6179
20
22
  ws_bom_robot_app/llm/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
- ws_bom_robot_app/llm/tools/tool_builder.py,sha256=sdnyiqXU-6fxcIUu1PvTjJHaOpdIZz-b5hsjIlp-RaY,920
22
- ws_bom_robot_app/llm/tools/tool_manager.py,sha256=-vBTL89S-KMxCitTUT15qN-jCikX2wbIjduHNJe0rQM,4596
23
- ws_bom_robot_app/llm/tools/utils.py,sha256=gO0YLqF3hhfsmoY_B-K940jZO9aJHAuWvykONmkKHHQ,1332
23
+ ws_bom_robot_app/llm/tools/tool_builder.py,sha256=OaA0jReNUpjfe7c8TVLM86acQ4w0cQaR3NE22hGKJb0,1165
24
+ ws_bom_robot_app/llm/tools/tool_manager.py,sha256=RZcJVPyWT9D3HUxSO1d5kSfTQtJB2CG5hocuFa01AzY,5816
25
+ ws_bom_robot_app/llm/tools/utils.py,sha256=SPC8pj2bt_xWO7wNR_5YBwUUvjJIK1xlavR4yfW4J-0,1320
24
26
  ws_bom_robot_app/llm/tools/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
- ws_bom_robot_app/llm/tools/models/main.py,sha256=LsOJ7vkcSzYLoE1oa3TG0Rs0pr9J5VS_e4li6aDx_fw,260
27
+ ws_bom_robot_app/llm/tools/models/main.py,sha256=o3Rwbn5nsugKOgLG0FUIuvtPPHYhfVpqG4E3BQB2nWM,388
26
28
  ws_bom_robot_app/llm/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
- ws_bom_robot_app/llm/utils/agent_utils.py,sha256=t8uWmszBo7PFjjGqvhzNtBrHD44WQUYuKnbs2ee0zSo,830
29
+ ws_bom_robot_app/llm/utils/agent_utils.py,sha256=9fdnVMHpSEBdmYef6NFXtMIXa8EB4QQICfXsXDiafHg,923
28
30
  ws_bom_robot_app/llm/utils/chunker.py,sha256=N7570xBYlObneg-fsvDhPAJ-Pv8C8OaYZOBK6q7LmMI,607
29
- ws_bom_robot_app/llm/utils/download.py,sha256=GaRypPgkx16HfYRj-upX9kvmjfAdFFb5TP4P97scWeA,3273
30
- ws_bom_robot_app/llm/utils/faiss_helper.py,sha256=VikpopCpEzV1lN5JISDabpHcIUkNDACNL52KliB4Hxs,5224
31
+ ws_bom_robot_app/llm/utils/download.py,sha256=iAUxH_NiCpTPtGzhC4hBtxotd2HPFt2MBhttslIxqiI,3194
31
32
  ws_bom_robot_app/llm/utils/kb.py,sha256=jja45WCbNI7SGEgqDS99nErlwB5eY8Ga7BMnhdMHZ90,1279
32
- ws_bom_robot_app/llm/utils/print.py,sha256=QH36yQW-rqFx0b9O0bojJ6HrnOBuOgwdiQNUDIeLTGw,828
33
+ ws_bom_robot_app/llm/utils/print.py,sha256=ZonoLPcfM6Cpw4_Ec455LiCovExOwvnIgvw1QORSCBY,799
34
+ ws_bom_robot_app/llm/utils/secrets.py,sha256=-HtqLIDVIJrpvGC5YhPAVyLsq8P4ChVM5g3GOfdwqVk,878
33
35
  ws_bom_robot_app/llm/utils/webhooks.py,sha256=LAAZqyN6VhV13wu4X-X85TwdDgAV2rNvIwQFIIc0FJM,2114
34
36
  ws_bom_robot_app/llm/vector_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
- ws_bom_robot_app/llm/vector_store/generator.py,sha256=Au6YrGdcn4ZkkamCFvSJhC44kUZ2qtT0mg2QbDPegEs,6329
37
+ ws_bom_robot_app/llm/vector_store/generator.py,sha256=9_xdtCKJhmt1OP0GXDjvFERXMP7ozLZT92KuYEBDgC0,6314
36
38
  ws_bom_robot_app/llm/vector_store/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
39
  ws_bom_robot_app/llm/vector_store/integration/azure.py,sha256=R37TaPQP-HJJJiaKE9rmMc9kpeXeRvdebbTY_982om0,3392
38
- ws_bom_robot_app/llm/vector_store/integration/base.py,sha256=IvIu8RkISuurrVKr2YPG96fsOI2kqhaEGyTGzjB-jCI,1550
40
+ ws_bom_robot_app/llm/vector_store/integration/base.py,sha256=8r6XO_XM8PcDXhsKst6q_Xw-P48rCiEtowmkBEDVd08,1957
39
41
  ws_bom_robot_app/llm/vector_store/integration/confluence.py,sha256=4fiRHB3J-SHZZxNGHwVkCrT-xSPbc91z4WrDE9fy6xU,2505
40
42
  ws_bom_robot_app/llm/vector_store/integration/dropbox.py,sha256=yhGvHTN0TEpUfhdvvV7RX5MxBwTUyddAX95Fgqp3mCg,2629
41
43
  ws_bom_robot_app/llm/vector_store/integration/gcs.py,sha256=fFDVDUR6eNB7FVTzDSEpMHFEWMgG16GLnpSf_mqGDdE,3184
@@ -49,10 +51,10 @@ ws_bom_robot_app/llm/vector_store/integration/sharepoint.py,sha256=zqqn-6qPrK50P
49
51
  ws_bom_robot_app/llm/vector_store/integration/sitemap.py,sha256=4WYj3C6Y_4vkGs5iUNR59l1YOZEDsQT8MnZ5rIYDL_k,4733
50
52
  ws_bom_robot_app/llm/vector_store/integration/slack.py,sha256=FMjESXm2QetFXI6i8epze7Kbbu22fV8CVaxb71AHnJ8,2572
51
53
  ws_bom_robot_app/llm/vector_store/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
- ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=Bv3r5YYAjLHp4sU_sxTk6-OmUdEgoVDqKL-xgWD9k_s,5240
53
- ws_bom_robot_app/llm/vector_store/loader/docling.py,sha256=12sMSH8DkEsC1Ctml2EIX2gs1BDnWWdynUEqGv-JAF4,2114
54
- ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=LDppW0ZATo4_1hh-KlsAM3TLawBvwBxva_a7k5Oz1sc,858
55
- ws_bom_robot_app-0.0.33.dist-info/METADATA,sha256=wOTdo7f2veSa9du-V-j-cM2ySyZcLIMVe1eQIpiOiO4,7756
56
- ws_bom_robot_app-0.0.33.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
57
- ws_bom_robot_app-0.0.33.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
58
- ws_bom_robot_app-0.0.33.dist-info/RECORD,,
54
+ ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=L_ugekNuAq0N9O-24wtlHSNHkqSeD-KsJrfGt_FX9Oc,5340
55
+ ws_bom_robot_app/llm/vector_store/loader/docling.py,sha256=yP0zgXLeFAlByaYuj-6cYariuknckrFds0dxdRcnVz8,3456
56
+ ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=qo9ejRZyKv_k6jnGgXnu1W5uqsMMtgqK_uvPpZQ0p74,833
57
+ ws_bom_robot_app-0.0.35.dist-info/METADATA,sha256=gwxWX98wtRsmod8DpysLX38QBX_Rl0MHdyCnclliUTs,8270
58
+ ws_bom_robot_app-0.0.35.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
59
+ ws_bom_robot_app-0.0.35.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
60
+ ws_bom_robot_app-0.0.35.dist-info/RECORD,,
@@ -1,127 +0,0 @@
1
- from langchain_community.vectorstores.faiss import FAISS
2
- from langchain_core.documents import Document
3
- from langchain_core.vectorstores.base import VectorStoreRetriever
4
- from langchain_openai import OpenAIEmbeddings
5
- from typing import Any
6
- import asyncio, gc, logging
7
- from langchain_text_splitters import CharacterTextSplitter
8
- from pydantic import SecretStr
9
-
10
- from ws_bom_robot_app.llm.utils.chunker import DocumentChunker
11
-
12
- class FaissHelper():
13
- _embedding_model = "text-embedding-3-small"
14
- _CACHE: dict[str, FAISS] = {}
15
-
16
- @staticmethod
17
- #@timer
18
- async def create(documents: list[Document], folder_path: str, api_key: SecretStr, return_folder_path:bool = False) -> str | None:
19
- try:
20
- embeddings = OpenAIEmbeddings(api_key=api_key, model=FaissHelper._embedding_model)
21
- faiss_instance = await asyncio.to_thread(
22
- FAISS.from_documents,
23
- DocumentChunker.chunk(documents),
24
- embeddings)
25
- await asyncio.to_thread(faiss_instance.save_local, folder_path)
26
- del faiss_instance, embeddings
27
- gc.collect()
28
- if return_folder_path:
29
- return folder_path
30
- return None
31
- except Exception as e:
32
- logging.error(f"Failed to create Faiss instance: {e}")
33
- return None
34
- finally:
35
- if 'documents' in locals():
36
- del documents
37
- gc.collect()
38
-
39
- @staticmethod
40
- #@timer
41
- def get_loader(folder_path:str,api_key:SecretStr) -> FAISS:
42
- """_summary_
43
-
44
- Args:
45
- folder_path (str): _description_
46
- api_key (str): _description_
47
-
48
- Returns:
49
- FAISS: _description_
50
- """
51
- if not folder_path in FaissHelper._CACHE:
52
- _faiss = FAISS.load_local(
53
- folder_path=folder_path,
54
- embeddings=OpenAIEmbeddings(api_key=api_key, model=FaissHelper._embedding_model),
55
- allow_dangerous_deserialization=True
56
- )
57
- FaissHelper._CACHE[folder_path] = _faiss
58
- return FaissHelper._CACHE[folder_path]
59
-
60
- @staticmethod
61
- #@timer
62
- def get_retriever(folder_path:str,api_key:SecretStr,search_type=str, search_kwargs= dict[str,Any]) -> VectorStoreRetriever:
63
- """_summary_
64
-
65
- Args:
66
- folder_path (str): _description_
67
- api_key (str): _description_
68
-
69
- Returns:
70
- VectorStoreRetriever: _description_
71
- """
72
- _faiss = FaissHelper.get_loader(folder_path,api_key)
73
- return _faiss.as_retriever(search_type=search_type, search_kwargs=search_kwargs)
74
- @staticmethod
75
- #@atimer
76
- async def _combine_search(retrievers: list[VectorStoreRetriever], query:str) -> list[Document]:
77
- """_summary_
78
-
79
- Args:
80
- list(VectorStoreRetriever): _description_
81
-
82
- Returns:
83
- list[Document]: _description_
84
- """
85
- def _remove_duplicates(docs: list[Document]) -> list[Document]:
86
- """Remove duplicate documents based on content"""
87
- seen_contents = set()
88
- unique_docs = []
89
- for doc in docs:
90
- if doc.page_content not in seen_contents:
91
- seen_contents.add(doc.page_content)
92
- unique_docs.append(doc)
93
- return unique_docs
94
- # Perform the searches concurrently
95
- search_tasks = [retriever.ainvoke(query) for retriever in retrievers]
96
- search_results = await asyncio.gather(*search_tasks)
97
- # Combine and de-duplicate the results
98
- all_docs = _remove_duplicates([doc for docs in search_results for doc in docs])
99
- return all_docs
100
- @staticmethod
101
- #@atimer
102
- async def invoke(folder_path:str,api_key:SecretStr, query:str, search_type=str, search_kwargs= dict[str,Any]) -> list[Document]:
103
- """_summary_
104
- Args:
105
- folder_path (str): _description_
106
- api_key (str): _description_
107
- query (str): _description_
108
- search_type (str): _description_
109
- search_kwargs (dict[str,Any]): _description_
110
- k: Number of documents to retrieve
111
- fetch_k: Number of documents to fetch for MMR selection (if None, defaults to 2 * k)
112
- lambda_mult: MMR diversity parameter (0 = max diversity, 1 = max similarity)
113
- Returns:
114
- list[Document]: _description_
115
- """
116
- if (search_type == "mixed"):
117
- similarity_retriever = FaissHelper.get_retriever(folder_path,api_key,"similarity",search_kwargs) # type: ignore
118
- mmr_kwargs = {
119
- "k": search_kwargs.get("k",4), # type: ignore
120
- "fetch_k": search_kwargs.get("fetch_k",20), #type: ignore
121
- "lambda_mult": search_kwargs.get("lambda_mult", 0.2), # type: ignore
122
- }
123
- search_kwargs.update(mmr_kwargs)
124
- mmr_retriever = FaissHelper.get_retriever(folder_path,api_key,"mmr",search_kwargs) # type: ignore
125
- return await FaissHelper._combine_search([similarity_retriever, mmr_retriever], query)
126
- return await FaissHelper.get_retriever(folder_path,api_key,search_type,search_kwargs).ainvoke(query)
127
-