ws-bom-robot-app 0.0.22__tar.gz → 0.0.24__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/PKG-INFO +8 -1
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/setup.py +1 -1
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/agent_handler.py +14 -15
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/api.py +0 -8
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/main.py +16 -20
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/tools/tool_builder.py +3 -2
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/tools/tool_manager.py +5 -1
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/tools/utils.py +4 -5
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/utils/print.py +7 -7
- ws_bom_robot_app-0.0.24/ws_bom_robot_app/llm/vector_store/integration/azure.py +62 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/integration/base.py +3 -3
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/integration/confluence.py +13 -1
- ws_bom_robot_app-0.0.24/ws_bom_robot_app/llm/vector_store/integration/dropbox.py +53 -0
- ws_bom_robot_app-0.0.24/ws_bom_robot_app/llm/vector_store/integration/gcs.py +62 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/integration/github.py +9 -0
- ws_bom_robot_app-0.0.24/ws_bom_robot_app/llm/vector_store/integration/googledrive.py +69 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/integration/jira.py +11 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/integration/manager.py +17 -2
- ws_bom_robot_app-0.0.24/ws_bom_robot_app/llm/vector_store/integration/s3.py +64 -0
- ws_bom_robot_app-0.0.24/ws_bom_robot_app/llm/vector_store/integration/sftp.py +64 -0
- ws_bom_robot_app-0.0.24/ws_bom_robot_app/llm/vector_store/integration/sharepoint.py +51 -0
- ws_bom_robot_app-0.0.24/ws_bom_robot_app/llm/vector_store/integration/slack.py +57 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/loader/base.py +5 -2
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/requirements.txt +7 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/task_manager.py +1 -1
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app.egg-info/PKG-INFO +8 -1
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app.egg-info/SOURCES.txt +8 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app.egg-info/requires.txt +7 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/README.md +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/pyproject.toml +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/setup.cfg +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/__init__.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/auth.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/config.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/cron_manager.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/__init__.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/agent_description.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/agent_lcel.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/defaut_prompt.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/models/__init__.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/models/api.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/models/base.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/models/kb.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/settings.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/tools/__init__.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/tools/models/__init__.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/tools/models/main.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/utils/__init__.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/utils/agent_utils.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/utils/download.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/utils/faiss_helper.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/utils/kb.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/utils/webhooks.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/__init__.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/generator.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/integration/__init__.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/integration/sitemap.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/loader/__init__.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/loader/json_loader.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/main.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/util.py +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app.egg-info/dependency_links.txt +0 -0
- {ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ws_bom_robot_app
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.24
|
|
4
4
|
Summary: A FastAPI application serving ws bom/robot/llm platform ai.
|
|
5
5
|
Home-page: https://github.com/websolutespa/bom
|
|
6
6
|
Author: Websolute Spa
|
|
@@ -26,9 +26,16 @@ Requires-Dist: opencv-python-headless==4.10.0.84
|
|
|
26
26
|
Requires-Dist: unstructured[all-docs]==0.16.11
|
|
27
27
|
Requires-Dist: langchain_unstructured==0.1.5
|
|
28
28
|
Requires-Dist: unstructured-ingest==0.3.8
|
|
29
|
+
Requires-Dist: unstructured-ingest[azure]
|
|
29
30
|
Requires-Dist: unstructured-ingest[confluence]
|
|
31
|
+
Requires-Dist: unstructured-ingest[dropbox]
|
|
32
|
+
Requires-Dist: unstructured-ingest[gcs]
|
|
30
33
|
Requires-Dist: unstructured-ingest[github]
|
|
34
|
+
Requires-Dist: unstructured-ingest[google_drive]
|
|
31
35
|
Requires-Dist: unstructured-ingest[jira]
|
|
36
|
+
Requires-Dist: unstructured-ingest[s3]
|
|
37
|
+
Requires-Dist: unstructured-ingest[sftp]
|
|
38
|
+
Requires-Dist: unstructured-ingest[slack]
|
|
32
39
|
Requires-Dist: html5lib==1.1
|
|
33
40
|
Requires-Dist: markdownify==0.14.1
|
|
34
41
|
Requires-Dist: nebuly==0.3.33
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="ws_bom_robot_app",
|
|
5
|
-
version="0.0.
|
|
5
|
+
version="0.0.24",
|
|
6
6
|
description="A FastAPI application serving ws bom/robot/llm platform ai.",
|
|
7
7
|
long_description=open("README.md", encoding='utf-8').read(),
|
|
8
8
|
long_description_content_type="text/markdown",
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from asyncio import Queue
|
|
1
2
|
from langchain_core.agents import AgentFinish
|
|
2
3
|
from langchain_core.outputs import ChatGenerationChunk, GenerationChunk
|
|
3
4
|
from langchain.callbacks.base import AsyncCallbackHandler
|
|
@@ -16,13 +17,13 @@ import json
|
|
|
16
17
|
|
|
17
18
|
class AgentHandler(AsyncCallbackHandler):
|
|
18
19
|
|
|
19
|
-
def __init__(self, threadId) -> None:
|
|
20
|
+
def __init__(self, queue: Queue, threadId: str = None) -> None:
|
|
20
21
|
super().__init__()
|
|
21
22
|
self._threadId = threadId
|
|
22
|
-
self.json_cards = None
|
|
23
23
|
self.json_block = ""
|
|
24
24
|
self.is_json_block = False
|
|
25
25
|
self.backtick_count = 0 # Conteggio dei backticks per il controllo accurato
|
|
26
|
+
self.queue = queue
|
|
26
27
|
|
|
27
28
|
async def on_llm_start(
|
|
28
29
|
self,
|
|
@@ -39,7 +40,7 @@ class AgentHandler(AsyncCallbackHandler):
|
|
|
39
40
|
"type": "info",
|
|
40
41
|
"threadId": self._threadId,
|
|
41
42
|
}
|
|
42
|
-
|
|
43
|
+
await self.queue.put(printString(firstChunk))
|
|
43
44
|
|
|
44
45
|
"""async def on_chat_model_start(self, serialized: Dict[str, Any], messages: List[List[BaseMessage]], *, run_id: UUID = None, parent_run_id = None, tags = None, metadata = None, **kwargs: Any) -> Any:
|
|
45
46
|
pass"""
|
|
@@ -75,7 +76,7 @@ class AgentHandler(AsyncCallbackHandler):
|
|
|
75
76
|
elif self.is_json_block:
|
|
76
77
|
self.json_block += token
|
|
77
78
|
else:
|
|
78
|
-
printString(token)
|
|
79
|
+
await self.queue.put(printString(token))
|
|
79
80
|
pass
|
|
80
81
|
|
|
81
82
|
async def on_agent_finish(
|
|
@@ -92,12 +93,9 @@ class AgentHandler(AsyncCallbackHandler):
|
|
|
92
93
|
AIMessage(content=finish.return_values["output"]),
|
|
93
94
|
]
|
|
94
95
|
)
|
|
95
|
-
if self.json_cards:
|
|
96
|
-
for card in self.json_cards:
|
|
97
|
-
printJson(card)
|
|
98
|
-
self.json_cards = None
|
|
99
96
|
finalChunk = {"type": "end"}
|
|
100
|
-
printJson(finalChunk)
|
|
97
|
+
await self.queue.put(printJson(finalChunk))
|
|
98
|
+
await self.queue.put(None)
|
|
101
99
|
|
|
102
100
|
async def process_json_block(self, json_block: str):
|
|
103
101
|
"""Processa il blocco JSON completo."""
|
|
@@ -108,15 +106,16 @@ class AgentHandler(AsyncCallbackHandler):
|
|
|
108
106
|
try:
|
|
109
107
|
# Prova a fare il parsing del JSON
|
|
110
108
|
parsed_json = json.loads(json_block_clean)
|
|
111
|
-
printJson(parsed_json)
|
|
109
|
+
await self.queue.put(printJson(parsed_json))
|
|
112
110
|
except json.JSONDecodeError as e:
|
|
113
111
|
# Se il JSON è malformato, logga l'errore
|
|
114
112
|
raise e
|
|
115
113
|
|
|
116
114
|
class RawAgentHandler(AsyncCallbackHandler):
|
|
117
115
|
|
|
118
|
-
def __init__(self) -> None:
|
|
116
|
+
def __init__(self,queue: Queue) -> None:
|
|
119
117
|
super().__init__()
|
|
118
|
+
self.queue = queue
|
|
120
119
|
|
|
121
120
|
async def on_llm_start(
|
|
122
121
|
self,
|
|
@@ -147,10 +146,9 @@ class RawAgentHandler(AsyncCallbackHandler):
|
|
|
147
146
|
tags: Optional[List[str]] = None,
|
|
148
147
|
**kwargs: Any,
|
|
149
148
|
) -> None:
|
|
150
|
-
"""
|
|
151
|
-
if token
|
|
152
|
-
|
|
153
|
-
pass
|
|
149
|
+
"""Handles new tokens during streaming."""
|
|
150
|
+
if token: # Only process non-empty tokens
|
|
151
|
+
await self.queue.put(token)
|
|
154
152
|
|
|
155
153
|
async def on_agent_finish(
|
|
156
154
|
self,
|
|
@@ -166,3 +164,4 @@ class RawAgentHandler(AsyncCallbackHandler):
|
|
|
166
164
|
AIMessage(content=finish.return_values["output"]),
|
|
167
165
|
]
|
|
168
166
|
)
|
|
167
|
+
await self.queue.put(None)
|
|
@@ -25,18 +25,10 @@ async def _invoke(rq: InvokeRequest):
|
|
|
25
25
|
async def _stream(rq: StreamRequest) -> StreamingResponse:
|
|
26
26
|
return StreamingResponse(stream(rq), media_type="application/json")
|
|
27
27
|
|
|
28
|
-
@router.post("/stream/none")
|
|
29
|
-
async def _stream_none(rq: StreamRequest) -> None:
|
|
30
|
-
await stream_none(rq)
|
|
31
|
-
|
|
32
28
|
@router.post("/stream/raw")
|
|
33
29
|
async def _stream_raw(rq: StreamRequest) -> StreamingResponse:
|
|
34
30
|
return StreamingResponse(stream(rq, formatted=False), media_type="application/json")
|
|
35
31
|
|
|
36
|
-
@router.post("/stream/raw/none")
|
|
37
|
-
async def _stream_raw_none(rq: StreamRequest) -> None:
|
|
38
|
-
await stream_none(rq, formatted=False)
|
|
39
|
-
|
|
40
32
|
@router.post("/kb")
|
|
41
33
|
async def _kb(rq: KbRequest) -> VectorDbResponse:
|
|
42
34
|
return await kb(rq)
|
|
@@ -10,6 +10,7 @@ from nebuly.providers.langchain import LangChainTrackingHandler
|
|
|
10
10
|
from langchain_core.callbacks.base import AsyncCallbackHandler
|
|
11
11
|
import warnings, asyncio, os, io, sys, json
|
|
12
12
|
from typing import List
|
|
13
|
+
from asyncio import Queue
|
|
13
14
|
|
|
14
15
|
async def invoke(rq: InvokeRequest) -> str:
|
|
15
16
|
await rq.initialize()
|
|
@@ -22,12 +23,12 @@ async def invoke(rq: InvokeRequest) -> str:
|
|
|
22
23
|
result: AIMessage = await processor.run_agent(_msg)
|
|
23
24
|
return {"result": result.content}
|
|
24
25
|
|
|
25
|
-
async def __stream(rq: StreamRequest,formatted: bool = True) -> None:
|
|
26
|
+
async def __stream(rq: StreamRequest,queue: Queue,formatted: bool = True) -> None:
|
|
26
27
|
await rq.initialize()
|
|
27
28
|
if formatted:
|
|
28
|
-
agent_handler = AgentHandler(rq.thread_id)
|
|
29
|
+
agent_handler = AgentHandler(queue,rq.thread_id)
|
|
29
30
|
else:
|
|
30
|
-
agent_handler = RawAgentHandler()
|
|
31
|
+
agent_handler = RawAgentHandler(queue)
|
|
31
32
|
os.environ["AGENT_HANDLER_FORMATTED"] = str(formatted)
|
|
32
33
|
callbacks: List[AsyncCallbackHandler] = [agent_handler]
|
|
33
34
|
settings.init()
|
|
@@ -53,7 +54,7 @@ async def __stream(rq: StreamRequest,formatted: bool = True) -> None:
|
|
|
53
54
|
processor = AgentLcel(
|
|
54
55
|
openai_config={"api_key": rq.secrets["openAIApiKey"], "openai_model": rq.model, "temperature": rq.temperature},
|
|
55
56
|
sys_message=rq.system_message,
|
|
56
|
-
tools=get_structured_tools(tools=rq.app_tools, api_key=rq.secrets["openAIApiKey"], callbacks=[callbacks[0]]),
|
|
57
|
+
tools=get_structured_tools(tools=rq.app_tools, api_key=rq.secrets["openAIApiKey"], callbacks=[callbacks[0]], queue=queue),
|
|
57
58
|
rules=rq.rules
|
|
58
59
|
)
|
|
59
60
|
|
|
@@ -71,25 +72,20 @@ async def __stream(rq: StreamRequest,formatted: bool = True) -> None:
|
|
|
71
72
|
{"callbacks": callbacks},
|
|
72
73
|
)
|
|
73
74
|
|
|
75
|
+
# Signal the end of streaming
|
|
76
|
+
await queue.put(None)
|
|
77
|
+
|
|
74
78
|
async def stream(rq: StreamRequest,formatted:bool = True) -> AsyncGenerator[str, None]:
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
sys.stdout = sys_stdout
|
|
79
|
+
queue = Queue()
|
|
80
|
+
task = asyncio.create_task(__stream(rq, queue, formatted))
|
|
78
81
|
try:
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
yield output
|
|
85
|
-
sys_stdout.truncate(0)
|
|
86
|
-
sys_stdout.seek(0)
|
|
87
|
-
# capture any remaining output after the task completes
|
|
88
|
-
output = sys_stdout.getvalue()
|
|
89
|
-
if output:
|
|
90
|
-
yield output
|
|
82
|
+
while True:
|
|
83
|
+
token = await queue.get()
|
|
84
|
+
if token is None: # None indicates the end of streaming
|
|
85
|
+
break
|
|
86
|
+
yield token
|
|
91
87
|
finally:
|
|
92
|
-
|
|
88
|
+
await task
|
|
93
89
|
|
|
94
90
|
async def stream_none(rq: StreamRequest, formatted: bool = True) -> None:
|
|
95
91
|
await __stream(rq, formatted)
|
{ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/tools/tool_builder.py
RENAMED
|
@@ -1,12 +1,13 @@
|
|
|
1
|
+
from asyncio import Queue
|
|
1
2
|
from langchain.tools import StructuredTool
|
|
2
3
|
from ws_bom_robot_app.llm.models.api import LlmAppTool
|
|
3
4
|
from ws_bom_robot_app.llm.tools.tool_manager import ToolManager
|
|
4
5
|
|
|
5
|
-
def get_structured_tools(tools: list[LlmAppTool], api_key:str, callbacks:list) -> list[StructuredTool]:
|
|
6
|
+
def get_structured_tools(tools: list[LlmAppTool], api_key:str, callbacks:list, queue: Queue) -> list[StructuredTool]:
|
|
6
7
|
_structured_tools :list[StructuredTool] = []
|
|
7
8
|
for tool in [tool for tool in tools if tool.is_active]:
|
|
8
9
|
if _tool_config := ToolManager._list.get(tool.function_name):
|
|
9
|
-
_tool_instance = ToolManager(tool, api_key, callbacks)
|
|
10
|
+
_tool_instance = ToolManager(tool, api_key, callbacks, queue)
|
|
10
11
|
_structured_tool = StructuredTool.from_function(
|
|
11
12
|
coroutine=_tool_instance.get_coroutine(),
|
|
12
13
|
name=tool.function_id,
|
{ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/tools/tool_manager.py
RENAMED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from asyncio import Queue
|
|
1
2
|
from typing import Optional, Type, Callable
|
|
2
3
|
from ws_bom_robot_app.llm.models.api import LlmAppTool
|
|
3
4
|
from ws_bom_robot_app.llm.utils.faiss_helper import FaissHelper
|
|
@@ -33,10 +34,12 @@ class ToolManager:
|
|
|
33
34
|
app_tool: LlmAppTool,
|
|
34
35
|
api_key: str,
|
|
35
36
|
callbacks: list,
|
|
37
|
+
queue: Optional[Queue] = None
|
|
36
38
|
):
|
|
37
39
|
self.app_tool = app_tool
|
|
38
40
|
self.api_key = api_key
|
|
39
41
|
self.callbacks = callbacks
|
|
42
|
+
self.queue = queue
|
|
40
43
|
|
|
41
44
|
|
|
42
45
|
#region functions
|
|
@@ -64,7 +67,8 @@ class ToolManager:
|
|
|
64
67
|
else:
|
|
65
68
|
search_type = "mixed"
|
|
66
69
|
search_kwargs = {"k": search_settings.search_k if search_settings.search_k else 4}
|
|
67
|
-
|
|
70
|
+
if self.queue:
|
|
71
|
+
await self.queue.put(getRandomWaitingMessage(self.app_tool.waiting_message, traduction=False))
|
|
68
72
|
return await FaissHelper.invoke(self.app_tool.vector_db, self.api_key, query, search_type, search_kwargs)
|
|
69
73
|
return []
|
|
70
74
|
#raise ValueError(f"Invalid configuration for {self.settings.name} tool of type {self.settings.type}. Must be a function or vector db not found.")
|
|
@@ -3,8 +3,8 @@ from langchain_openai import ChatOpenAI
|
|
|
3
3
|
from langchain_core.prompts import PromptTemplate
|
|
4
4
|
from ws_bom_robot_app.llm.utils.print import printString
|
|
5
5
|
|
|
6
|
-
def __print_output(data: str) ->
|
|
7
|
-
printString(data) if os.environ.get("AGENT_HANDLER_FORMATTED") == str(True) else
|
|
6
|
+
def __print_output(data: str) -> str:
|
|
7
|
+
return printString(data) if os.environ.get("AGENT_HANDLER_FORMATTED") == str(True) else f"{data} "
|
|
8
8
|
|
|
9
9
|
def getRandomWaitingMessage(waiting_messages: str, traduction: bool = True) -> str:
|
|
10
10
|
if not waiting_messages: return ""
|
|
@@ -12,13 +12,12 @@ def getRandomWaitingMessage(waiting_messages: str, traduction: bool = True) -> s
|
|
|
12
12
|
if not messages: return ""
|
|
13
13
|
chosen_message = random.choice(messages) + "\n"
|
|
14
14
|
if not traduction:
|
|
15
|
-
__print_output(chosen_message)
|
|
15
|
+
return __print_output(chosen_message)
|
|
16
16
|
return chosen_message
|
|
17
17
|
|
|
18
18
|
async def translate_text(api_key, language, text: str, callbacks: list) -> str:
|
|
19
19
|
if language == "it":
|
|
20
|
-
__print_output(text)
|
|
21
|
-
return
|
|
20
|
+
return __print_output(text)
|
|
22
21
|
llm = ChatOpenAI(api_key=api_key, model="gpt-3.5-turbo-0125", streaming=True)
|
|
23
22
|
sys_message = """Il tuo compito è di tradurre il testo_da_tradure nella seguente lingua: \n\n lingua: {language}\n\n testo_da_tradure: {testo_da_tradure} \n\nTraduci il testo_da_tradure nella lingua {language} senza aggiungere altro:"""
|
|
24
23
|
prompt = PromptTemplate.from_template(sys_message)
|
|
@@ -14,16 +14,16 @@ class HiddenPrints:
|
|
|
14
14
|
sys.stdout = self._original_stdout
|
|
15
15
|
sys.stderr = self._original_stderr
|
|
16
16
|
|
|
17
|
-
def printJson(data) ->
|
|
18
|
-
|
|
17
|
+
def printJson(data) -> str:
|
|
18
|
+
return f"{json.dumps(data, indent=2, sort_keys=True)},"
|
|
19
19
|
|
|
20
20
|
def printSingleJson(data) -> str:
|
|
21
|
-
|
|
21
|
+
return f"{json.dumps(data, indent=2, sort_keys=True)}"
|
|
22
22
|
|
|
23
|
-
def printString(data: str) ->
|
|
23
|
+
def printString(data: str) -> str:
|
|
24
24
|
if data != "":
|
|
25
|
-
printJson(data)
|
|
25
|
+
return printJson(data)
|
|
26
26
|
|
|
27
|
-
def printSingleString(data: str) ->
|
|
27
|
+
def printSingleString(data: str) -> str:
|
|
28
28
|
if data != "":
|
|
29
|
-
printSingleJson(data)
|
|
29
|
+
return printSingleJson(data)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
|
|
3
|
+
from unstructured_ingest.v2.processes.connectors.fsspec.azure import AzureConnectionConfig, AzureAccessConfig, AzureDownloaderConfig, AzureIndexerConfig
|
|
4
|
+
from langchain_core.documents import Document
|
|
5
|
+
from ws_bom_robot_app.llm.vector_store.loader.base import Loader
|
|
6
|
+
from typing import Union, Optional
|
|
7
|
+
from pydantic import BaseModel, Field, AliasChoices
|
|
8
|
+
class AzureParams(BaseModel):
|
|
9
|
+
"""
|
|
10
|
+
AzureParams is a model that holds configuration parameters for connecting to Azure services.
|
|
11
|
+
|
|
12
|
+
Attributes:
|
|
13
|
+
remote_url (str): The URL of the remote Azure service, in the form az://<container> or az://<container>/<path> for sub-folders.
|
|
14
|
+
account_name (str): The name of the Azure storage account.
|
|
15
|
+
\nProvide one of the following:
|
|
16
|
+
- account_key (Optional[str]): The key for the Azure storage account. Default is None.
|
|
17
|
+
- connection_string (Optional[str]): The connection string for the Azure storage account. Default is None.
|
|
18
|
+
- sas_token (Optional[str]): The Shared Access Signature token for the Azure storage account. Default is None. Detail: https://learn.microsoft.com/en-us/azure/ai-services/translator/document-translation/how-to-guides/create-sas-tokens?tabs=Containers
|
|
19
|
+
recursive (bool): Indicates whether the operation should be recursive. Default is False.
|
|
20
|
+
extension (list[str]): A list of file extensions to filter the files. Default is None.
|
|
21
|
+
"""
|
|
22
|
+
remote_url: str = Field(validation_alias=AliasChoices("remoteUrl","remote_url"))
|
|
23
|
+
account_name: str = Field(validation_alias=AliasChoices("accountName","account_name"))
|
|
24
|
+
account_key: Optional[str] = Field(default=None,validation_alias=AliasChoices("accountKey","account_key"))
|
|
25
|
+
connection_string: Optional[str] = Field(default=None,validation_alias=AliasChoices("connectionString","connection_string"))
|
|
26
|
+
sas_token: Optional[str] = Field(default=None,validation_alias=AliasChoices("sasToken","sas_token"))
|
|
27
|
+
recursive: bool = False
|
|
28
|
+
extension: list[str] = Field(default=None)
|
|
29
|
+
class Azure(IntegrationStrategy):
|
|
30
|
+
def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
|
|
31
|
+
super().__init__(knowledgebase_path, data)
|
|
32
|
+
self.__data = AzureParams.model_validate(self.data)
|
|
33
|
+
self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
|
|
34
|
+
def working_subdirectory(self) -> str:
|
|
35
|
+
return 'azure'
|
|
36
|
+
def run(self) -> None:
|
|
37
|
+
indexer_config = AzureIndexerConfig(
|
|
38
|
+
remote_url=self.__data.remote_url,
|
|
39
|
+
recursive=self.__data.recursive,
|
|
40
|
+
#sample_n_files=1
|
|
41
|
+
)
|
|
42
|
+
downloader_config = AzureDownloaderConfig(
|
|
43
|
+
download_dir=self.working_directory
|
|
44
|
+
)
|
|
45
|
+
connection_config = AzureConnectionConfig(
|
|
46
|
+
access_config=AzureAccessConfig(
|
|
47
|
+
account_name=self.__data.account_name,
|
|
48
|
+
account_key=self.__data.account_key,
|
|
49
|
+
connection_string=self.__data.connection_string,
|
|
50
|
+
sas_token=self.__data.sas_token
|
|
51
|
+
)
|
|
52
|
+
)
|
|
53
|
+
self.__unstructured_ingest.pipeline(
|
|
54
|
+
indexer_config,
|
|
55
|
+
downloader_config,
|
|
56
|
+
connection_config,
|
|
57
|
+
extension=self.__data.extension).run()
|
|
58
|
+
async def load(self) -> list[Document]:
|
|
59
|
+
await asyncio.to_thread(self.run)
|
|
60
|
+
await asyncio.sleep(1)
|
|
61
|
+
return await Loader(self.working_directory).load()
|
|
62
|
+
|
|
@@ -10,7 +10,7 @@ class IntegrationStrategy(ABC):
|
|
|
10
10
|
self.knowledgebase_path = knowledgebase_path
|
|
11
11
|
self.data = data
|
|
12
12
|
self.working_directory = os.path.join(self.knowledgebase_path,self.working_subdirectory())
|
|
13
|
-
os.makedirs(self.working_directory,
|
|
13
|
+
os.makedirs(self.working_directory, exist_ok=True)
|
|
14
14
|
@property
|
|
15
15
|
@abstractmethod
|
|
16
16
|
def working_subdirectory(self) -> str:
|
|
@@ -23,7 +23,7 @@ class IntegrationStrategy(ABC):
|
|
|
23
23
|
class UnstructuredIngest():
|
|
24
24
|
def __init__(self, working_directory: str):
|
|
25
25
|
self.working_directory = working_directory
|
|
26
|
-
def pipeline(self,indexer,downloader,connection) -> Pipeline:
|
|
26
|
+
def pipeline(self,indexer,downloader,connection,extension: list[str] = None) -> Pipeline:
|
|
27
27
|
return Pipeline.from_configs(
|
|
28
28
|
context=ProcessorConfig(
|
|
29
29
|
reprocess=False,
|
|
@@ -38,6 +38,6 @@ class UnstructuredIngest():
|
|
|
38
38
|
downloader_config=downloader,
|
|
39
39
|
source_connection_config=connection,
|
|
40
40
|
partitioner_config=PartitionerConfig(),
|
|
41
|
-
filterer_config=FiltererConfig()
|
|
41
|
+
filterer_config=FiltererConfig(file_glob=[f"**/*{ext}" for ext in extension] if extension else None)
|
|
42
42
|
)
|
|
43
43
|
|
|
@@ -7,10 +7,21 @@ from typing import Union
|
|
|
7
7
|
from pydantic import BaseModel, Field, AliasChoices
|
|
8
8
|
|
|
9
9
|
class ConfluenceParams(BaseModel):
|
|
10
|
+
"""
|
|
11
|
+
ConfluenceParams is a data model for storing Confluence integration parameters.
|
|
12
|
+
|
|
13
|
+
Attributes:
|
|
14
|
+
url (str): The URL of the Confluence instance, e.g., 'https://example.atlassian.net'.
|
|
15
|
+
access_token (str): The access token for authenticating with Confluence, e.g., 'AT....'
|
|
16
|
+
user_email (str): The email address of the Confluence user
|
|
17
|
+
spaces (list[str]): A list of Confluence spaces to interact with, e.g., ['SPACE1', 'SPACE2'].
|
|
18
|
+
extension (list[str], optional): A list of file extensions to filter by. Defaults to None, e.g., ['.pdf', '.docx'].
|
|
19
|
+
"""
|
|
10
20
|
url: str
|
|
11
21
|
access_token: str = Field(validation_alias=AliasChoices("accessToken","access_token"))
|
|
12
22
|
user_email: str = Field(validation_alias=AliasChoices("userEmail","user_email"))
|
|
13
23
|
spaces: list[str] = []
|
|
24
|
+
extension: list[str] = Field(default=None)
|
|
14
25
|
class Confluence(IntegrationStrategy):
|
|
15
26
|
def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
|
|
16
27
|
super().__init__(knowledgebase_path, data)
|
|
@@ -33,7 +44,8 @@ class Confluence(IntegrationStrategy):
|
|
|
33
44
|
self.__unstructured_ingest.pipeline(
|
|
34
45
|
indexer_config,
|
|
35
46
|
downloader_config,
|
|
36
|
-
connection_config
|
|
47
|
+
connection_config,
|
|
48
|
+
extension=self.__data.extension).run()
|
|
37
49
|
async def load(self) -> list[Document]:
|
|
38
50
|
await asyncio.to_thread(self.run)
|
|
39
51
|
await asyncio.sleep(1)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
|
|
3
|
+
from unstructured_ingest.v2.processes.connectors.fsspec.dropbox import DropboxConnectionConfig, DropboxAccessConfig, DropboxDownloaderConfig, DropboxIndexerConfig
|
|
4
|
+
from langchain_core.documents import Document
|
|
5
|
+
from ws_bom_robot_app.llm.vector_store.loader.base import Loader
|
|
6
|
+
from typing import Union
|
|
7
|
+
from pydantic import BaseModel, Field, AliasChoices
|
|
8
|
+
class DropboxParams(BaseModel):
|
|
9
|
+
"""
|
|
10
|
+
DropboxParams is a model for storing parameters required to interact with Dropbox.
|
|
11
|
+
|
|
12
|
+
Attributes:
|
|
13
|
+
remote_url (str): The URL of the remote Dropbox location, e.g. 'dropbox://demo-directory' or 'dropbox://demo-directory/sub-directory'.
|
|
14
|
+
token (str): The authentication token for accessing Dropbox.
|
|
15
|
+
create app: https://www.dropbox.com/developers, with file.content.read permission, and generate token.
|
|
16
|
+
recursive (bool, optional): A flag indicating whether to search directories recursively. Defaults to False.
|
|
17
|
+
extension (list[str], optional): A list of file extensions to filter by. Defaults to None, e.g. ['.pdf', '.docx'].
|
|
18
|
+
"""
|
|
19
|
+
remote_url: str = Field(validation_alias=AliasChoices("remoteUrl","remote_url"))
|
|
20
|
+
token: str
|
|
21
|
+
recursive: bool = False
|
|
22
|
+
extension: list[str] = Field(default=None)
|
|
23
|
+
class Dropbox(IntegrationStrategy):
|
|
24
|
+
def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
|
|
25
|
+
super().__init__(knowledgebase_path, data)
|
|
26
|
+
self.__data = DropboxParams.model_validate(self.data)
|
|
27
|
+
self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
|
|
28
|
+
def working_subdirectory(self) -> str:
|
|
29
|
+
return 'dropbox'
|
|
30
|
+
def run(self) -> None:
|
|
31
|
+
indexer_config = DropboxIndexerConfig(
|
|
32
|
+
remote_url=self.__data.remote_url,
|
|
33
|
+
recursive=self.__data.recursive,
|
|
34
|
+
#sample_n_files=1
|
|
35
|
+
)
|
|
36
|
+
downloader_config = DropboxDownloaderConfig(
|
|
37
|
+
download_dir=self.working_directory
|
|
38
|
+
)
|
|
39
|
+
connection_config = DropboxConnectionConfig(
|
|
40
|
+
access_config=DropboxAccessConfig(
|
|
41
|
+
token=self.__data.token
|
|
42
|
+
)
|
|
43
|
+
)
|
|
44
|
+
self.__unstructured_ingest.pipeline(
|
|
45
|
+
indexer_config,
|
|
46
|
+
downloader_config,
|
|
47
|
+
connection_config,
|
|
48
|
+
extension=self.__data.extension).run()
|
|
49
|
+
async def load(self) -> list[Document]:
|
|
50
|
+
await asyncio.to_thread(self.run)
|
|
51
|
+
await asyncio.sleep(1)
|
|
52
|
+
return await Loader(self.working_directory).load()
|
|
53
|
+
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
|
|
3
|
+
from unstructured_ingest.v2.processes.connectors.fsspec.gcs import GcsIndexerConfig, GcsConnectionConfig, GcsAccessConfig, GcsDownloaderConfig
|
|
4
|
+
from langchain_core.documents import Document
|
|
5
|
+
from ws_bom_robot_app.llm.vector_store.loader.base import Loader
|
|
6
|
+
from typing import Union, Optional
|
|
7
|
+
from pydantic import BaseModel, Field, AliasChoices
|
|
8
|
+
class GcsParams(BaseModel):
|
|
9
|
+
"""
|
|
10
|
+
GcsParams is a model that defines the parameters required for Google Cloud Storage (GCS) integration.
|
|
11
|
+
Documentation:
|
|
12
|
+
- create service account: https://cloud.google.com/iam/docs/service-accounts-create?hl=en#console
|
|
13
|
+
- create key: https://cloud.google.com/iam/docs/keys-create-delete?hl=en#creating
|
|
14
|
+
- export key in a single line\n
|
|
15
|
+
```pwsh
|
|
16
|
+
(Get-Content -Path "<path-to-downloaded-key-file>" -Raw).Replace("`r`n", "").Replace("`n", "")
|
|
17
|
+
```
|
|
18
|
+
- create bucket with 'Storage Object Viewer' permission: https://cloud.google.com/storage/docs/creating-buckets?hl=en#console
|
|
19
|
+
- add principal to bucket: https://cloud.google.com/storage/docs/access-control/using-iam-permissions?hl=en#console
|
|
20
|
+
- manage IAM policies: https://cloud.google.com/storage/docs/access-control/using-iam-permissions?hl=en
|
|
21
|
+
|
|
22
|
+
Attributes:
|
|
23
|
+
remote_url (str): The URL of the remote GCS bucket, e.g. 'gcs://demo-bucket' or 'gcs://demo-bucket/sub-directory'.
|
|
24
|
+
service_account_key (str): The service account key for accessing the GCS bucket.
|
|
25
|
+
recursive (bool): A flag indicating whether to recursively access the GCS bucket. Defaults to False.
|
|
26
|
+
extension (list[str]): A list of file extensions to filter the files in the GCS bucket. Defaults to None.
|
|
27
|
+
"""
|
|
28
|
+
remote_url: str = Field(validation_alias=AliasChoices("remoteUrl","remote_url"))
|
|
29
|
+
service_account_key: str = Field(validation_alias=AliasChoices("serviceAccountKey","service_account_key"))
|
|
30
|
+
recursive: bool = False
|
|
31
|
+
extension: list[str] = Field(default=None)
|
|
32
|
+
class Gcs(IntegrationStrategy):
|
|
33
|
+
def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
|
|
34
|
+
super().__init__(knowledgebase_path, data)
|
|
35
|
+
self.__data = GcsParams.model_validate(self.data)
|
|
36
|
+
self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
|
|
37
|
+
def working_subdirectory(self) -> str:
|
|
38
|
+
return 'gcs'
|
|
39
|
+
def run(self) -> None:
|
|
40
|
+
indexer_config = GcsIndexerConfig(
|
|
41
|
+
remote_url=self.__data.remote_url,
|
|
42
|
+
recursive=self.__data.recursive,
|
|
43
|
+
#sample_n_files=1
|
|
44
|
+
)
|
|
45
|
+
downloader_config = GcsDownloaderConfig(
|
|
46
|
+
download_dir=self.working_directory
|
|
47
|
+
)
|
|
48
|
+
connection_config = GcsConnectionConfig(
|
|
49
|
+
access_config=GcsAccessConfig(
|
|
50
|
+
service_account_key=self.__data.service_account_key
|
|
51
|
+
)
|
|
52
|
+
)
|
|
53
|
+
self.__unstructured_ingest.pipeline(
|
|
54
|
+
indexer_config,
|
|
55
|
+
downloader_config,
|
|
56
|
+
connection_config,
|
|
57
|
+
extension=self.__data.extension).run()
|
|
58
|
+
async def load(self) -> list[Document]:
|
|
59
|
+
await asyncio.to_thread(self.run)
|
|
60
|
+
await asyncio.sleep(1)
|
|
61
|
+
return await Loader(self.working_directory).load()
|
|
62
|
+
|
|
@@ -10,6 +10,15 @@ from ws_bom_robot_app.llm.vector_store.loader.base import Loader
|
|
|
10
10
|
from pydantic import BaseModel, Field, AliasChoices
|
|
11
11
|
|
|
12
12
|
class GithubParams(BaseModel):
|
|
13
|
+
"""
|
|
14
|
+
GithubParams is a model for storing parameters required to interact with a GitHub repository.
|
|
15
|
+
|
|
16
|
+
Attributes:
|
|
17
|
+
repo (str): The name of the GitHub repository, e.g., 'companyname/reponame'
|
|
18
|
+
access_token (Optional[str]): The access token for authenticating with GitHub, e.g., 'ghp_1234567890'.
|
|
19
|
+
branch (Optional[str]): The branch of the repository to interact with. Defaults to 'main'.
|
|
20
|
+
file_ext (Optional[list[str]]): A list of file extensions to filter by, e.g. ['.md', '.pdf']. Defaults to an empty list.
|
|
21
|
+
"""
|
|
13
22
|
repo: str
|
|
14
23
|
access_token: Optional[str] | None = Field(None,validation_alias=AliasChoices("accessToken","access_token"))
|
|
15
24
|
branch: Optional[str] = 'main'
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
|
|
3
|
+
from unstructured_ingest.v2.processes.connectors.google_drive import GoogleDriveConnectionConfig, GoogleDriveDownloaderConfig, GoogleDriveIndexerConfig, GoogleDriveAccessConfig
|
|
4
|
+
from langchain_core.documents import Document
|
|
5
|
+
from ws_bom_robot_app.llm.vector_store.loader.base import Loader
|
|
6
|
+
from typing import Union
|
|
7
|
+
from pydantic import BaseModel, Field, AliasChoices
|
|
8
|
+
class GoogleDriveParams(BaseModel):
|
|
9
|
+
"""
|
|
10
|
+
GoogleDriveParams is a model that holds parameters for Google Drive integration.
|
|
11
|
+
|
|
12
|
+
Attributes:
|
|
13
|
+
service_account_key (dict): The service account key for Google Drive API authentication \n
|
|
14
|
+
- detail: https://developers.google.com/workspace/guides/create-credentials#service-accountc \n
|
|
15
|
+
- create a service account key, download the JSON file, and pass the content of the JSON file as a dictionary \n
|
|
16
|
+
- e.g., {
|
|
17
|
+
"type": "service_account",
|
|
18
|
+
"project_id": "demo-project-123456",
|
|
19
|
+
"private_key_id": "**********",
|
|
20
|
+
"private_key": "-----BEGIN PRIVATE KEY-----...----END PRIVATE KEY-----",
|
|
21
|
+
"client_email": "demo-client@demo-project-123456.iam.gserviceaccount.com",
|
|
22
|
+
"client_id": "123456",
|
|
23
|
+
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
|
24
|
+
"token_uri": "https://oauth2.googleapis.com/token",
|
|
25
|
+
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
|
26
|
+
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/demo-client%40demo-project-123456.iam.gserviceaccount.com",
|
|
27
|
+
"universe_domain": "googleapis.com"
|
|
28
|
+
}
|
|
29
|
+
- enable Google Drive API: https://console.cloud.google.com/marketplace/product/google/drive.googleapis.com
|
|
30
|
+
- copy email address of the service account and share the Google Drive with the email address: https://www.youtube.com/watch?v=ykJQzEe_2dM&t=2s
|
|
31
|
+
|
|
32
|
+
drive_id (str): The {folder_id} of the Google Drive to interact with, e.g., https://drive.google.com/drive/folders/{folder_id}
|
|
33
|
+
extensions (list[str]): A list of file extensions to filter the files in the Google Drive, e.g., ['.pdf', '.docx'].
|
|
34
|
+
recursive (bool): A flag indicating whether to search files recursively in the Google Drive.
|
|
35
|
+
"""
|
|
36
|
+
service_account_key: dict = Field(validation_alias=AliasChoices("serviceAccountKey","service_account_key"))
|
|
37
|
+
drive_id: str = Field(validation_alias=AliasChoices("driveId","drive_id"))
|
|
38
|
+
extensions: list[str] = []
|
|
39
|
+
recursive: bool = False
|
|
40
|
+
class GoogleDrive(IntegrationStrategy):
|
|
41
|
+
def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
|
|
42
|
+
super().__init__(knowledgebase_path, data)
|
|
43
|
+
self.__data = GoogleDriveParams.model_validate(self.data)
|
|
44
|
+
self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
|
|
45
|
+
def working_subdirectory(self) -> str:
|
|
46
|
+
return 'googledrive'
|
|
47
|
+
def run(self) -> None:
|
|
48
|
+
indexer_config = GoogleDriveIndexerConfig(
|
|
49
|
+
extensions=self.__data.extensions,
|
|
50
|
+
recursive=self.__data.recursive
|
|
51
|
+
)
|
|
52
|
+
downloader_config = GoogleDriveDownloaderConfig(
|
|
53
|
+
download_dir=self.working_directory
|
|
54
|
+
)
|
|
55
|
+
connection_config = GoogleDriveConnectionConfig(
|
|
56
|
+
access_config=GoogleDriveAccessConfig(
|
|
57
|
+
service_account_key=self.__data.service_account_key
|
|
58
|
+
),
|
|
59
|
+
drive_id=self.__data.drive_id
|
|
60
|
+
)
|
|
61
|
+
self.__unstructured_ingest.pipeline(
|
|
62
|
+
indexer_config,
|
|
63
|
+
downloader_config,
|
|
64
|
+
connection_config).run()
|
|
65
|
+
async def load(self) -> list[Document]:
|
|
66
|
+
await asyncio.to_thread(self.run)
|
|
67
|
+
await asyncio.sleep(1)
|
|
68
|
+
return await Loader(self.working_directory).load()
|
|
69
|
+
|
|
@@ -9,6 +9,17 @@ from pydantic import BaseModel, Field, AliasChoices
|
|
|
9
9
|
from typing import Optional, Union
|
|
10
10
|
|
|
11
11
|
class JiraParams(BaseModel):
|
|
12
|
+
"""
|
|
13
|
+
JiraParams is a Pydantic model that represents the parameters required to interact with a Jira instance.
|
|
14
|
+
|
|
15
|
+
Attributes:
|
|
16
|
+
url (str): The URL of the Jira instance, e.g., 'https://example.atlassian.net'.
|
|
17
|
+
access_token (str): The access token for authenticating with the Jira API.
|
|
18
|
+
user_email (str): The email address of the Jira user.
|
|
19
|
+
projects (list[str]): A list of project keys or IDs to interact with, e.g., ['SCRUM', 'PROJ1'].
|
|
20
|
+
boards (Optional[list[str]]): An optional list of board IDs to interact with. Defaults to None, e.g., ['1', '2'].
|
|
21
|
+
issues (Optional[list[str]]): An optional list of issue keys or IDs to interact with. Defaults to None, e.g., ['SCRUM-1', 'PROJ1-1'].
|
|
22
|
+
"""
|
|
12
23
|
url: str
|
|
13
24
|
access_token: str = Field(validation_alias=AliasChoices("accessToken","access_token"))
|
|
14
25
|
user_email: str = Field(validation_alias=AliasChoices("userEmail","user_email"))
|
|
@@ -1,16 +1,31 @@
|
|
|
1
1
|
from typing import Type
|
|
2
|
+
from ws_bom_robot_app.llm.vector_store.integration.azure import Azure
|
|
2
3
|
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
|
|
3
4
|
from ws_bom_robot_app.llm.vector_store.integration.confluence import Confluence
|
|
5
|
+
from ws_bom_robot_app.llm.vector_store.integration.dropbox import Dropbox
|
|
6
|
+
from ws_bom_robot_app.llm.vector_store.integration.gcs import Gcs
|
|
4
7
|
from ws_bom_robot_app.llm.vector_store.integration.github import Github
|
|
8
|
+
from ws_bom_robot_app.llm.vector_store.integration.googledrive import GoogleDrive
|
|
5
9
|
from ws_bom_robot_app.llm.vector_store.integration.jira import Jira
|
|
10
|
+
from ws_bom_robot_app.llm.vector_store.integration.s3 import S3
|
|
11
|
+
from ws_bom_robot_app.llm.vector_store.integration.sftp import Sftp
|
|
6
12
|
from ws_bom_robot_app.llm.vector_store.integration.sitemap import Sitemap
|
|
13
|
+
from ws_bom_robot_app.llm.vector_store.integration.slack import Slack
|
|
7
14
|
|
|
8
15
|
class IntegrationManager:
|
|
9
16
|
_list: dict[str, Type[IntegrationStrategy]] = {
|
|
10
|
-
"
|
|
17
|
+
"llmkbazure": Azure,
|
|
18
|
+
"llmkbconfluence": Confluence,
|
|
19
|
+
"llmkbdropbox": Dropbox,
|
|
11
20
|
"llmkbgithub": Github,
|
|
21
|
+
"llmkbgcs": Gcs,
|
|
22
|
+
"llmkbgoogledrive": GoogleDrive,
|
|
12
23
|
"llmkbjira": Jira,
|
|
13
|
-
"
|
|
24
|
+
"llmkbs3": S3,
|
|
25
|
+
"llmkbsftp": Sftp,
|
|
26
|
+
"llmkbsitemap": Sitemap,
|
|
27
|
+
"llmkbslack": Slack,
|
|
28
|
+
|
|
14
29
|
}
|
|
15
30
|
@classmethod
|
|
16
31
|
def get_strategy(cls, name: str, knowledgebase_path: str, data: dict[str, str]) -> IntegrationStrategy:
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
|
|
3
|
+
from unstructured_ingest.v2.processes.connectors.fsspec.s3 import S3ConnectionConfig, S3AccessConfig, S3DownloaderConfig, S3IndexerConfig
|
|
4
|
+
from langchain_core.documents import Document
|
|
5
|
+
from ws_bom_robot_app.llm.vector_store.loader.base import Loader
|
|
6
|
+
from typing import Union, Optional
|
|
7
|
+
from pydantic import BaseModel, Field, AliasChoices
|
|
8
|
+
class S3Params(BaseModel):
|
|
9
|
+
"""
|
|
10
|
+
S3Params is a data model for storing parameters required to interact with an S3 bucket.
|
|
11
|
+
Documentation:
|
|
12
|
+
- ceate S3 bucket: https://docs.aws.amazon.com/AmazonS3/latest/userguide/GetStartedWithS3.html#creating-bucket
|
|
13
|
+
- enable authenticated bucket access: https://docs.aws.amazon.com/AmazonS3/latest/userguide/walkthrough1.html
|
|
14
|
+
- set policies s3:ListBucket and s3:GetObject: https://docs.aws.amazon.com/AmazonS3/latest/userguide/example-policies-s3.html
|
|
15
|
+
- generate key/secret: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html#Using_CreateAccessKey
|
|
16
|
+
- optionally create STS token: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp_request.html#api_getsessiontoken
|
|
17
|
+
|
|
18
|
+
Attributes:
|
|
19
|
+
remote_url (str): The URL of the remote S3 bucket, e.g., 's3://demo-bucket' or 's3://demo-bucket/sub-directory'.
|
|
20
|
+
key (Optional[str]): The AWS access key ID for the authenticated AWS IAM user, e.g., 'AKIAIOSFODNN7EXAMPLE'.
|
|
21
|
+
secret (Optional[str]): The corresponding AWS secret access key, e.g., 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY'.
|
|
22
|
+
token (Optional[str]): If required, the AWS STS session token for temporary access. Default is None.
|
|
23
|
+
recursive (bool): A flag indicating whether to perform operations recursively. Default is False.
|
|
24
|
+
extension (list[str]): A list of file extensions to filter the files. Default is None. e.g., ['.pdf', '.docx'].
|
|
25
|
+
"""
|
|
26
|
+
remote_url: str = Field(validation_alias=AliasChoices("remoteUrl","remote_url"))
|
|
27
|
+
key: str
|
|
28
|
+
secret: str
|
|
29
|
+
token: Optional[str] = None
|
|
30
|
+
recursive: bool = False
|
|
31
|
+
extension: list[str] = Field(default=None)
|
|
32
|
+
class S3(IntegrationStrategy):
|
|
33
|
+
def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
|
|
34
|
+
super().__init__(knowledgebase_path, data)
|
|
35
|
+
self.__data = S3Params.model_validate(self.data)
|
|
36
|
+
self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
|
|
37
|
+
def working_subdirectory(self) -> str:
|
|
38
|
+
return 's3'
|
|
39
|
+
def run(self) -> None:
|
|
40
|
+
indexer_config = S3IndexerConfig(
|
|
41
|
+
remote_url=self.__data.remote_url,
|
|
42
|
+
recursive=self.__data.recursive,
|
|
43
|
+
#sample_n_files=1
|
|
44
|
+
)
|
|
45
|
+
downloader_config = S3DownloaderConfig(
|
|
46
|
+
download_dir=self.working_directory
|
|
47
|
+
)
|
|
48
|
+
connection_config = S3ConnectionConfig(
|
|
49
|
+
access_config=S3AccessConfig(
|
|
50
|
+
key=self.__data.key,
|
|
51
|
+
secret=self.__data.secret,
|
|
52
|
+
token=self.__data.token
|
|
53
|
+
)
|
|
54
|
+
)
|
|
55
|
+
self.__unstructured_ingest.pipeline(
|
|
56
|
+
indexer_config,
|
|
57
|
+
downloader_config,
|
|
58
|
+
connection_config,
|
|
59
|
+
extension=self.__data.extension).run()
|
|
60
|
+
async def load(self) -> list[Document]:
|
|
61
|
+
await asyncio.to_thread(self.run)
|
|
62
|
+
await asyncio.sleep(1)
|
|
63
|
+
return await Loader(self.working_directory).load()
|
|
64
|
+
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
|
|
3
|
+
from unstructured_ingest.v2.processes.connectors.fsspec.sftp import SftpConnectionConfig, SftpAccessConfig, SftpDownloaderConfig, SftpIndexerConfig
|
|
4
|
+
from langchain_core.documents import Document
|
|
5
|
+
from ws_bom_robot_app.llm.vector_store.loader.base import Loader
|
|
6
|
+
from typing import Union, Optional
|
|
7
|
+
from pydantic import BaseModel, Field, AliasChoices
|
|
8
|
+
class SftpParams(BaseModel):
|
|
9
|
+
"""
|
|
10
|
+
SftpParams is a model that defines the parameters required for SFTP integration.
|
|
11
|
+
|
|
12
|
+
Attributes:
|
|
13
|
+
remote_url (str): The URL of the remote SFTP server, e.g. 'sftp://example.com' or 'sftp://example.com/directory'.
|
|
14
|
+
host (Optional[str]): The hostname or IP address of the SFTP server. Defaults to None and inferred from remote_url
|
|
15
|
+
port (Optional[int]): The port number to connect to on the SFTP server. Defaults to 22.
|
|
16
|
+
username (str): The username to authenticate with the SFTP server.
|
|
17
|
+
password (str): The password to authenticate with the SFTP server.
|
|
18
|
+
recursive (bool): Whether to perform recursive operations. Defaults to False.
|
|
19
|
+
extension (list[str]): A list of file extensions to filter by. Defaults to None, e.g. ['.pdf', '.docx'].
|
|
20
|
+
"""
|
|
21
|
+
remote_url: str = Field(validation_alias=AliasChoices("remoteUrl","remote_url"))
|
|
22
|
+
host: Optional[str] = None
|
|
23
|
+
port: Optional[int] = 22
|
|
24
|
+
username: str
|
|
25
|
+
password: str
|
|
26
|
+
recursive: bool = False
|
|
27
|
+
extension: list[str] = Field(default=None)
|
|
28
|
+
class Sftp(IntegrationStrategy):
|
|
29
|
+
def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
|
|
30
|
+
super().__init__(knowledgebase_path, data)
|
|
31
|
+
self.__data = SftpParams.model_validate(self.data)
|
|
32
|
+
self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
|
|
33
|
+
def working_subdirectory(self) -> str:
|
|
34
|
+
return 'sftp'
|
|
35
|
+
def run(self) -> None:
|
|
36
|
+
indexer_config = SftpIndexerConfig(
|
|
37
|
+
remote_url=self.__data.remote_url,
|
|
38
|
+
recursive=self.__data.recursive,
|
|
39
|
+
#sample_n_files=1
|
|
40
|
+
)
|
|
41
|
+
downloader_config = SftpDownloaderConfig(
|
|
42
|
+
download_dir=self.working_directory,
|
|
43
|
+
remote_url=self.__data.remote_url
|
|
44
|
+
)
|
|
45
|
+
connection_config = SftpConnectionConfig(
|
|
46
|
+
access_config=SftpAccessConfig(
|
|
47
|
+
password=self.__data.password
|
|
48
|
+
),
|
|
49
|
+
username=self.__data.username,
|
|
50
|
+
host=self.__data.host,
|
|
51
|
+
port=self.__data.port,
|
|
52
|
+
look_for_keys=False,
|
|
53
|
+
allow_agent=False
|
|
54
|
+
)
|
|
55
|
+
self.__unstructured_ingest.pipeline(
|
|
56
|
+
indexer_config,
|
|
57
|
+
downloader_config,
|
|
58
|
+
connection_config,
|
|
59
|
+
extension=self.__data.extension).run()
|
|
60
|
+
async def load(self) -> list[Document]:
|
|
61
|
+
await asyncio.to_thread(self.run)
|
|
62
|
+
await asyncio.sleep(1)
|
|
63
|
+
return await Loader(self.working_directory).load()
|
|
64
|
+
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
|
|
3
|
+
from unstructured_ingest.v2.processes.connectors.sharepoint import SharepointIndexerConfig, SharepointDownloaderConfig, SharepointConnectionConfig, SharepointAccessConfig
|
|
4
|
+
from langchain_core.documents import Document
|
|
5
|
+
from ws_bom_robot_app.llm.vector_store.loader.base import Loader
|
|
6
|
+
from typing import Union, Optional
|
|
7
|
+
from pydantic import BaseModel, Field, AliasChoices
|
|
8
|
+
|
|
9
|
+
class SharepointParams(BaseModel):
|
|
10
|
+
client_id : str = Field(validation_alias=AliasChoices("clientId","client_id"))
|
|
11
|
+
client_secret : str = Field(validation_alias=AliasChoices("clientSecret","client_secret"))
|
|
12
|
+
site_url: str = Field(validation_alias=AliasChoices("siteUrl","site_url"))
|
|
13
|
+
site_path: str = Field(default=None,validation_alias=AliasChoices("sitePath","site_path"))
|
|
14
|
+
recursive: bool = Field(default=False)
|
|
15
|
+
omit_files: bool = Field(default=False, validation_alias=AliasChoices("omitFiles","omit_files")),
|
|
16
|
+
omit_pages: bool = Field(default=False, validation_alias=AliasChoices("omitPages","omit_pages")),
|
|
17
|
+
omit_lists: bool = Field(default=False, validation_alias=AliasChoices("omitLists","omit_lists")),
|
|
18
|
+
extension: list[str] = Field(default=None)
|
|
19
|
+
class Sharepoint(IntegrationStrategy):
|
|
20
|
+
def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
|
|
21
|
+
super().__init__(knowledgebase_path, data)
|
|
22
|
+
self.__data = SharepointParams.model_validate(self.data)
|
|
23
|
+
self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
|
|
24
|
+
def working_subdirectory(self) -> str:
|
|
25
|
+
return 'sharepoint'
|
|
26
|
+
def run(self) -> None:
|
|
27
|
+
indexer_config = SharepointIndexerConfig(
|
|
28
|
+
path=self.__data.site_path,
|
|
29
|
+
recursive=self.__data.recursive,
|
|
30
|
+
omit_files=self.__data.omit_files,
|
|
31
|
+
omit_pages=self.__data.omit_pages,
|
|
32
|
+
omit_lists=self.__data.omit_lists
|
|
33
|
+
)
|
|
34
|
+
downloader_config = SharepointDownloaderConfig(
|
|
35
|
+
download_dir=self.working_directory
|
|
36
|
+
)
|
|
37
|
+
connection_config = SharepointConnectionConfig(
|
|
38
|
+
access_config=SharepointAccessConfig(client_cred=self.__data.client_secret),
|
|
39
|
+
client_id=self.__data.client_id,
|
|
40
|
+
site=self.__data.site_url,
|
|
41
|
+
permissions_config=None
|
|
42
|
+
)
|
|
43
|
+
self.__unstructured_ingest.pipeline(
|
|
44
|
+
indexer_config,
|
|
45
|
+
downloader_config,
|
|
46
|
+
connection_config,
|
|
47
|
+
extension=self.__data.extension).run()
|
|
48
|
+
async def load(self) -> list[Document]:
|
|
49
|
+
await asyncio.to_thread(self.run)
|
|
50
|
+
await asyncio.sleep(1)
|
|
51
|
+
return await Loader(self.working_directory).load()
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
|
|
3
|
+
from unstructured_ingest.v2.processes.connectors.slack import SlackIndexerConfig, SlackDownloaderConfig, SlackConnectionConfig, SlackAccessConfig
|
|
4
|
+
from langchain_core.documents import Document
|
|
5
|
+
from ws_bom_robot_app.llm.vector_store.loader.base import Loader
|
|
6
|
+
from typing import Union
|
|
7
|
+
from pydantic import BaseModel, Field, AliasChoices
|
|
8
|
+
from datetime import datetime, timedelta
|
|
9
|
+
|
|
10
|
+
class SlackParams(BaseModel):
|
|
11
|
+
"""
|
|
12
|
+
SlackParams is a data model for storing Slack integration parameters.
|
|
13
|
+
Documentation:
|
|
14
|
+
- create slack app: https://api.slack.com/quickstart#creating
|
|
15
|
+
- set channels:history scope: https://api.slack.com/quickstart#scopes
|
|
16
|
+
- installing app/get token: https://api.slack.com/quickstart#installing
|
|
17
|
+
- add app to channel/s
|
|
18
|
+
|
|
19
|
+
Attributes:
|
|
20
|
+
token (str): The authentication token for accessing the Slack API.
|
|
21
|
+
channels (list[str]): A list of Slack channel IDs, e.g. ['C01B2PZQX1V'].
|
|
22
|
+
num_days (int, optional): The number of days to retrieve messages from. Defaults to 7.
|
|
23
|
+
extension (list[str], optional): A list of file extensions to filter messages by, e.g. [".xml"]. Defaults to None.
|
|
24
|
+
"""
|
|
25
|
+
token: str
|
|
26
|
+
channels: list[str]
|
|
27
|
+
num_days: int = Field(default=7,validation_alias=AliasChoices("numDays","num_days"))
|
|
28
|
+
extension: list[str] = Field(default=None)
|
|
29
|
+
class Slack(IntegrationStrategy):
|
|
30
|
+
def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
|
|
31
|
+
super().__init__(knowledgebase_path, data)
|
|
32
|
+
self.__data = SlackParams.model_validate(self.data)
|
|
33
|
+
self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
|
|
34
|
+
def working_subdirectory(self) -> str:
|
|
35
|
+
return 'slack'
|
|
36
|
+
def run(self) -> None:
|
|
37
|
+
indexer_config = SlackIndexerConfig(
|
|
38
|
+
channels=self.__data.channels,
|
|
39
|
+
start_date=datetime.now() - timedelta(days=self.__data.num_days),
|
|
40
|
+
end_date=datetime.now()
|
|
41
|
+
)
|
|
42
|
+
downloader_config = SlackDownloaderConfig(
|
|
43
|
+
download_dir=self.working_directory
|
|
44
|
+
)
|
|
45
|
+
connection_config = SlackConnectionConfig(
|
|
46
|
+
access_config=SlackAccessConfig(token=self.__data.token)
|
|
47
|
+
)
|
|
48
|
+
self.__unstructured_ingest.pipeline(
|
|
49
|
+
indexer_config,
|
|
50
|
+
downloader_config,
|
|
51
|
+
connection_config,
|
|
52
|
+
extension=self.__data.extension).run()
|
|
53
|
+
async def load(self) -> list[Document]:
|
|
54
|
+
await asyncio.to_thread(self.run)
|
|
55
|
+
await asyncio.sleep(1)
|
|
56
|
+
return await Loader(self.working_directory).load()
|
|
57
|
+
|
{ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/loader/base.py
RENAMED
|
@@ -12,7 +12,10 @@ from ws_bom_robot_app.llm.vector_store.loader.json_loader import JsonLoader
|
|
|
12
12
|
|
|
13
13
|
class LoaderConfig(BaseModel):
|
|
14
14
|
loader: type[BaseLoader]
|
|
15
|
-
kwargs: Optional[dict[str, Any]] = {
|
|
15
|
+
kwargs: Optional[dict[str, Any]] = {
|
|
16
|
+
'chunking_strategy': 'basic',
|
|
17
|
+
'max_characters': 10_000
|
|
18
|
+
}
|
|
16
19
|
#post_processors: Optional[list[Callable[[str], str]]] = None
|
|
17
20
|
|
|
18
21
|
class Loader():
|
|
@@ -45,7 +48,7 @@ class Loader():
|
|
|
45
48
|
'strategy':'ocr_only', #https://docs.unstructured.io/open-source/core-functionality/partitioning auto,ocr_only,hi_res
|
|
46
49
|
'split_pdf_page': False,
|
|
47
50
|
'chunking_strategy': 'basic',
|
|
48
|
-
'max_characters':
|
|
51
|
+
'max_characters': 10_000,
|
|
49
52
|
'include_page_breaks': True,
|
|
50
53
|
'include_orig_elements': False}),
|
|
51
54
|
'.png': LoaderConfig(loader=UnstructuredLoader,kwargs={"strategy":"ocr_only"}),
|
|
@@ -23,9 +23,16 @@ opencv-python-headless==4.10.0.84 #docker specs
|
|
|
23
23
|
unstructured[all-docs]==0.16.11
|
|
24
24
|
langchain_unstructured==0.1.5
|
|
25
25
|
unstructured-ingest==0.3.8
|
|
26
|
+
unstructured-ingest[azure]
|
|
26
27
|
unstructured-ingest[confluence]
|
|
28
|
+
unstructured-ingest[dropbox]
|
|
29
|
+
unstructured-ingest[gcs]
|
|
27
30
|
unstructured-ingest[github]
|
|
31
|
+
unstructured-ingest[google_drive]
|
|
28
32
|
unstructured-ingest[jira]
|
|
33
|
+
unstructured-ingest[s3]
|
|
34
|
+
unstructured-ingest[sftp]
|
|
35
|
+
unstructured-ingest[slack]
|
|
29
36
|
html5lib==1.1 #beautifulsoup4 parser
|
|
30
37
|
|
|
31
38
|
#integrations
|
|
@@ -170,7 +170,7 @@ class TaskManagerStrategy(ABC):
|
|
|
170
170
|
pass
|
|
171
171
|
|
|
172
172
|
def task_cleanup_rule(self, task: TaskEntry) -> bool:
|
|
173
|
-
return task.status.
|
|
173
|
+
return task.status.metadata.start_at and datetime.fromisoformat(task.status.metadata.start_at) < datetime.now() - timedelta(days=config.robot_task_retention_days)
|
|
174
174
|
|
|
175
175
|
def task_done_callback(self, task_entry: TaskEntry, headers: TaskHeader | None = None) -> Callable:
|
|
176
176
|
def callback(task: asyncio.Task):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ws_bom_robot_app
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.24
|
|
4
4
|
Summary: A FastAPI application serving ws bom/robot/llm platform ai.
|
|
5
5
|
Home-page: https://github.com/websolutespa/bom
|
|
6
6
|
Author: Websolute Spa
|
|
@@ -26,9 +26,16 @@ Requires-Dist: opencv-python-headless==4.10.0.84
|
|
|
26
26
|
Requires-Dist: unstructured[all-docs]==0.16.11
|
|
27
27
|
Requires-Dist: langchain_unstructured==0.1.5
|
|
28
28
|
Requires-Dist: unstructured-ingest==0.3.8
|
|
29
|
+
Requires-Dist: unstructured-ingest[azure]
|
|
29
30
|
Requires-Dist: unstructured-ingest[confluence]
|
|
31
|
+
Requires-Dist: unstructured-ingest[dropbox]
|
|
32
|
+
Requires-Dist: unstructured-ingest[gcs]
|
|
30
33
|
Requires-Dist: unstructured-ingest[github]
|
|
34
|
+
Requires-Dist: unstructured-ingest[google_drive]
|
|
31
35
|
Requires-Dist: unstructured-ingest[jira]
|
|
36
|
+
Requires-Dist: unstructured-ingest[s3]
|
|
37
|
+
Requires-Dist: unstructured-ingest[sftp]
|
|
38
|
+
Requires-Dist: unstructured-ingest[slack]
|
|
32
39
|
Requires-Dist: html5lib==1.1
|
|
33
40
|
Requires-Dist: markdownify==0.14.1
|
|
34
41
|
Requires-Dist: nebuly==0.3.33
|
|
@@ -42,12 +42,20 @@ ws_bom_robot_app/llm/utils/webhooks.py
|
|
|
42
42
|
ws_bom_robot_app/llm/vector_store/__init__.py
|
|
43
43
|
ws_bom_robot_app/llm/vector_store/generator.py
|
|
44
44
|
ws_bom_robot_app/llm/vector_store/integration/__init__.py
|
|
45
|
+
ws_bom_robot_app/llm/vector_store/integration/azure.py
|
|
45
46
|
ws_bom_robot_app/llm/vector_store/integration/base.py
|
|
46
47
|
ws_bom_robot_app/llm/vector_store/integration/confluence.py
|
|
48
|
+
ws_bom_robot_app/llm/vector_store/integration/dropbox.py
|
|
49
|
+
ws_bom_robot_app/llm/vector_store/integration/gcs.py
|
|
47
50
|
ws_bom_robot_app/llm/vector_store/integration/github.py
|
|
51
|
+
ws_bom_robot_app/llm/vector_store/integration/googledrive.py
|
|
48
52
|
ws_bom_robot_app/llm/vector_store/integration/jira.py
|
|
49
53
|
ws_bom_robot_app/llm/vector_store/integration/manager.py
|
|
54
|
+
ws_bom_robot_app/llm/vector_store/integration/s3.py
|
|
55
|
+
ws_bom_robot_app/llm/vector_store/integration/sftp.py
|
|
56
|
+
ws_bom_robot_app/llm/vector_store/integration/sharepoint.py
|
|
50
57
|
ws_bom_robot_app/llm/vector_store/integration/sitemap.py
|
|
58
|
+
ws_bom_robot_app/llm/vector_store/integration/slack.py
|
|
51
59
|
ws_bom_robot_app/llm/vector_store/loader/__init__.py
|
|
52
60
|
ws_bom_robot_app/llm/vector_store/loader/base.py
|
|
53
61
|
ws_bom_robot_app/llm/vector_store/loader/json_loader.py
|
|
@@ -14,9 +14,16 @@ opencv-python-headless==4.10.0.84
|
|
|
14
14
|
unstructured[all-docs]==0.16.11
|
|
15
15
|
langchain_unstructured==0.1.5
|
|
16
16
|
unstructured-ingest==0.3.8
|
|
17
|
+
unstructured-ingest[azure]
|
|
17
18
|
unstructured-ingest[confluence]
|
|
19
|
+
unstructured-ingest[dropbox]
|
|
20
|
+
unstructured-ingest[gcs]
|
|
18
21
|
unstructured-ingest[github]
|
|
22
|
+
unstructured-ingest[google_drive]
|
|
19
23
|
unstructured-ingest[jira]
|
|
24
|
+
unstructured-ingest[s3]
|
|
25
|
+
unstructured-ingest[sftp]
|
|
26
|
+
unstructured-ingest[slack]
|
|
20
27
|
html5lib==1.1
|
|
21
28
|
markdownify==0.14.1
|
|
22
29
|
nebuly==0.3.33
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/agent_description.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/tools/models/__init__.py
RENAMED
|
File without changes
|
{ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/tools/models/main.py
RENAMED
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/utils/agent_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/utils/faiss_helper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/__init__.py
RENAMED
|
File without changes
|
{ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/generator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ws_bom_robot_app-0.0.22 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|