ws-bom-robot-app 0.0.23__py3-none-any.whl → 0.0.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ws_bom_robot_app/llm/agent_handler.py +14 -15
- ws_bom_robot_app/llm/api.py +0 -8
- ws_bom_robot_app/llm/main.py +17 -20
- ws_bom_robot_app/llm/tools/tool_builder.py +3 -2
- ws_bom_robot_app/llm/tools/tool_manager.py +5 -1
- ws_bom_robot_app/llm/tools/utils.py +4 -5
- ws_bom_robot_app/llm/utils/print.py +7 -7
- ws_bom_robot_app/llm/vector_store/generator.py +13 -5
- ws_bom_robot_app/llm/vector_store/integration/dropbox.py +1 -1
- ws_bom_robot_app/llm/vector_store/integration/manager.py +2 -0
- ws_bom_robot_app/llm/vector_store/integration/sharepoint.py +106 -0
- ws_bom_robot_app/llm/vector_store/loader/base.py +33 -26
- ws_bom_robot_app/llm/vector_store/loader/docling.py +35 -0
- {ws_bom_robot_app-0.0.23.dist-info → ws_bom_robot_app-0.0.25.dist-info}/METADATA +18 -8
- {ws_bom_robot_app-0.0.23.dist-info → ws_bom_robot_app-0.0.25.dist-info}/RECORD +17 -15
- {ws_bom_robot_app-0.0.23.dist-info → ws_bom_robot_app-0.0.25.dist-info}/WHEEL +0 -0
- {ws_bom_robot_app-0.0.23.dist-info → ws_bom_robot_app-0.0.25.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from asyncio import Queue
|
|
1
2
|
from langchain_core.agents import AgentFinish
|
|
2
3
|
from langchain_core.outputs import ChatGenerationChunk, GenerationChunk
|
|
3
4
|
from langchain.callbacks.base import AsyncCallbackHandler
|
|
@@ -16,13 +17,13 @@ import json
|
|
|
16
17
|
|
|
17
18
|
class AgentHandler(AsyncCallbackHandler):
|
|
18
19
|
|
|
19
|
-
def __init__(self, threadId) -> None:
|
|
20
|
+
def __init__(self, queue: Queue, threadId: str = None) -> None:
|
|
20
21
|
super().__init__()
|
|
21
22
|
self._threadId = threadId
|
|
22
|
-
self.json_cards = None
|
|
23
23
|
self.json_block = ""
|
|
24
24
|
self.is_json_block = False
|
|
25
25
|
self.backtick_count = 0 # Conteggio dei backticks per il controllo accurato
|
|
26
|
+
self.queue = queue
|
|
26
27
|
|
|
27
28
|
async def on_llm_start(
|
|
28
29
|
self,
|
|
@@ -39,7 +40,7 @@ class AgentHandler(AsyncCallbackHandler):
|
|
|
39
40
|
"type": "info",
|
|
40
41
|
"threadId": self._threadId,
|
|
41
42
|
}
|
|
42
|
-
|
|
43
|
+
await self.queue.put(printString(firstChunk))
|
|
43
44
|
|
|
44
45
|
"""async def on_chat_model_start(self, serialized: Dict[str, Any], messages: List[List[BaseMessage]], *, run_id: UUID = None, parent_run_id = None, tags = None, metadata = None, **kwargs: Any) -> Any:
|
|
45
46
|
pass"""
|
|
@@ -75,7 +76,7 @@ class AgentHandler(AsyncCallbackHandler):
|
|
|
75
76
|
elif self.is_json_block:
|
|
76
77
|
self.json_block += token
|
|
77
78
|
else:
|
|
78
|
-
printString(token)
|
|
79
|
+
await self.queue.put(printString(token))
|
|
79
80
|
pass
|
|
80
81
|
|
|
81
82
|
async def on_agent_finish(
|
|
@@ -92,12 +93,9 @@ class AgentHandler(AsyncCallbackHandler):
|
|
|
92
93
|
AIMessage(content=finish.return_values["output"]),
|
|
93
94
|
]
|
|
94
95
|
)
|
|
95
|
-
if self.json_cards:
|
|
96
|
-
for card in self.json_cards:
|
|
97
|
-
printJson(card)
|
|
98
|
-
self.json_cards = None
|
|
99
96
|
finalChunk = {"type": "end"}
|
|
100
|
-
printJson(finalChunk)
|
|
97
|
+
await self.queue.put(printJson(finalChunk))
|
|
98
|
+
await self.queue.put(None)
|
|
101
99
|
|
|
102
100
|
async def process_json_block(self, json_block: str):
|
|
103
101
|
"""Processa il blocco JSON completo."""
|
|
@@ -108,15 +106,16 @@ class AgentHandler(AsyncCallbackHandler):
|
|
|
108
106
|
try:
|
|
109
107
|
# Prova a fare il parsing del JSON
|
|
110
108
|
parsed_json = json.loads(json_block_clean)
|
|
111
|
-
printJson(parsed_json)
|
|
109
|
+
await self.queue.put(printJson(parsed_json))
|
|
112
110
|
except json.JSONDecodeError as e:
|
|
113
111
|
# Se il JSON è malformato, logga l'errore
|
|
114
112
|
raise e
|
|
115
113
|
|
|
116
114
|
class RawAgentHandler(AsyncCallbackHandler):
|
|
117
115
|
|
|
118
|
-
def __init__(self) -> None:
|
|
116
|
+
def __init__(self,queue: Queue) -> None:
|
|
119
117
|
super().__init__()
|
|
118
|
+
self.queue = queue
|
|
120
119
|
|
|
121
120
|
async def on_llm_start(
|
|
122
121
|
self,
|
|
@@ -147,10 +146,9 @@ class RawAgentHandler(AsyncCallbackHandler):
|
|
|
147
146
|
tags: Optional[List[str]] = None,
|
|
148
147
|
**kwargs: Any,
|
|
149
148
|
) -> None:
|
|
150
|
-
"""
|
|
151
|
-
if token
|
|
152
|
-
|
|
153
|
-
pass
|
|
149
|
+
"""Handles new tokens during streaming."""
|
|
150
|
+
if token: # Only process non-empty tokens
|
|
151
|
+
await self.queue.put(token)
|
|
154
152
|
|
|
155
153
|
async def on_agent_finish(
|
|
156
154
|
self,
|
|
@@ -166,3 +164,4 @@ class RawAgentHandler(AsyncCallbackHandler):
|
|
|
166
164
|
AIMessage(content=finish.return_values["output"]),
|
|
167
165
|
]
|
|
168
166
|
)
|
|
167
|
+
await self.queue.put(None)
|
ws_bom_robot_app/llm/api.py
CHANGED
|
@@ -25,18 +25,10 @@ async def _invoke(rq: InvokeRequest):
|
|
|
25
25
|
async def _stream(rq: StreamRequest) -> StreamingResponse:
|
|
26
26
|
return StreamingResponse(stream(rq), media_type="application/json")
|
|
27
27
|
|
|
28
|
-
@router.post("/stream/none")
|
|
29
|
-
async def _stream_none(rq: StreamRequest) -> None:
|
|
30
|
-
await stream_none(rq)
|
|
31
|
-
|
|
32
28
|
@router.post("/stream/raw")
|
|
33
29
|
async def _stream_raw(rq: StreamRequest) -> StreamingResponse:
|
|
34
30
|
return StreamingResponse(stream(rq, formatted=False), media_type="application/json")
|
|
35
31
|
|
|
36
|
-
@router.post("/stream/raw/none")
|
|
37
|
-
async def _stream_raw_none(rq: StreamRequest) -> None:
|
|
38
|
-
await stream_none(rq, formatted=False)
|
|
39
|
-
|
|
40
32
|
@router.post("/kb")
|
|
41
33
|
async def _kb(rq: KbRequest) -> VectorDbResponse:
|
|
42
34
|
return await kb(rq)
|
ws_bom_robot_app/llm/main.py
CHANGED
|
@@ -10,6 +10,7 @@ from nebuly.providers.langchain import LangChainTrackingHandler
|
|
|
10
10
|
from langchain_core.callbacks.base import AsyncCallbackHandler
|
|
11
11
|
import warnings, asyncio, os, io, sys, json
|
|
12
12
|
from typing import List
|
|
13
|
+
from asyncio import Queue
|
|
13
14
|
|
|
14
15
|
async def invoke(rq: InvokeRequest) -> str:
|
|
15
16
|
await rq.initialize()
|
|
@@ -22,12 +23,13 @@ async def invoke(rq: InvokeRequest) -> str:
|
|
|
22
23
|
result: AIMessage = await processor.run_agent(_msg)
|
|
23
24
|
return {"result": result.content}
|
|
24
25
|
|
|
25
|
-
async def __stream(rq: StreamRequest,formatted: bool = True) -> None:
|
|
26
|
+
async def __stream(rq: StreamRequest,queue: Queue,formatted: bool = True) -> None:
|
|
26
27
|
await rq.initialize()
|
|
28
|
+
#os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
|
|
27
29
|
if formatted:
|
|
28
|
-
agent_handler = AgentHandler(rq.thread_id)
|
|
30
|
+
agent_handler = AgentHandler(queue,rq.thread_id)
|
|
29
31
|
else:
|
|
30
|
-
agent_handler = RawAgentHandler()
|
|
32
|
+
agent_handler = RawAgentHandler(queue)
|
|
31
33
|
os.environ["AGENT_HANDLER_FORMATTED"] = str(formatted)
|
|
32
34
|
callbacks: List[AsyncCallbackHandler] = [agent_handler]
|
|
33
35
|
settings.init()
|
|
@@ -53,7 +55,7 @@ async def __stream(rq: StreamRequest,formatted: bool = True) -> None:
|
|
|
53
55
|
processor = AgentLcel(
|
|
54
56
|
openai_config={"api_key": rq.secrets["openAIApiKey"], "openai_model": rq.model, "temperature": rq.temperature},
|
|
55
57
|
sys_message=rq.system_message,
|
|
56
|
-
tools=get_structured_tools(tools=rq.app_tools, api_key=rq.secrets["openAIApiKey"], callbacks=[callbacks[0]]),
|
|
58
|
+
tools=get_structured_tools(tools=rq.app_tools, api_key=rq.secrets["openAIApiKey"], callbacks=[callbacks[0]], queue=queue),
|
|
57
59
|
rules=rq.rules
|
|
58
60
|
)
|
|
59
61
|
|
|
@@ -71,25 +73,20 @@ async def __stream(rq: StreamRequest,formatted: bool = True) -> None:
|
|
|
71
73
|
{"callbacks": callbacks},
|
|
72
74
|
)
|
|
73
75
|
|
|
76
|
+
# Signal the end of streaming
|
|
77
|
+
await queue.put(None)
|
|
78
|
+
|
|
74
79
|
async def stream(rq: StreamRequest,formatted:bool = True) -> AsyncGenerator[str, None]:
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
sys.stdout = sys_stdout
|
|
80
|
+
queue = Queue()
|
|
81
|
+
task = asyncio.create_task(__stream(rq, queue, formatted))
|
|
78
82
|
try:
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
yield output
|
|
85
|
-
sys_stdout.truncate(0)
|
|
86
|
-
sys_stdout.seek(0)
|
|
87
|
-
# capture any remaining output after the task completes
|
|
88
|
-
output = sys_stdout.getvalue()
|
|
89
|
-
if output:
|
|
90
|
-
yield output
|
|
83
|
+
while True:
|
|
84
|
+
token = await queue.get()
|
|
85
|
+
if token is None: # None indicates the end of streaming
|
|
86
|
+
break
|
|
87
|
+
yield token
|
|
91
88
|
finally:
|
|
92
|
-
|
|
89
|
+
await task
|
|
93
90
|
|
|
94
91
|
async def stream_none(rq: StreamRequest, formatted: bool = True) -> None:
|
|
95
92
|
await __stream(rq, formatted)
|
|
@@ -1,12 +1,13 @@
|
|
|
1
|
+
from asyncio import Queue
|
|
1
2
|
from langchain.tools import StructuredTool
|
|
2
3
|
from ws_bom_robot_app.llm.models.api import LlmAppTool
|
|
3
4
|
from ws_bom_robot_app.llm.tools.tool_manager import ToolManager
|
|
4
5
|
|
|
5
|
-
def get_structured_tools(tools: list[LlmAppTool], api_key:str, callbacks:list) -> list[StructuredTool]:
|
|
6
|
+
def get_structured_tools(tools: list[LlmAppTool], api_key:str, callbacks:list, queue: Queue) -> list[StructuredTool]:
|
|
6
7
|
_structured_tools :list[StructuredTool] = []
|
|
7
8
|
for tool in [tool for tool in tools if tool.is_active]:
|
|
8
9
|
if _tool_config := ToolManager._list.get(tool.function_name):
|
|
9
|
-
_tool_instance = ToolManager(tool, api_key, callbacks)
|
|
10
|
+
_tool_instance = ToolManager(tool, api_key, callbacks, queue)
|
|
10
11
|
_structured_tool = StructuredTool.from_function(
|
|
11
12
|
coroutine=_tool_instance.get_coroutine(),
|
|
12
13
|
name=tool.function_id,
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from asyncio import Queue
|
|
1
2
|
from typing import Optional, Type, Callable
|
|
2
3
|
from ws_bom_robot_app.llm.models.api import LlmAppTool
|
|
3
4
|
from ws_bom_robot_app.llm.utils.faiss_helper import FaissHelper
|
|
@@ -33,10 +34,12 @@ class ToolManager:
|
|
|
33
34
|
app_tool: LlmAppTool,
|
|
34
35
|
api_key: str,
|
|
35
36
|
callbacks: list,
|
|
37
|
+
queue: Optional[Queue] = None
|
|
36
38
|
):
|
|
37
39
|
self.app_tool = app_tool
|
|
38
40
|
self.api_key = api_key
|
|
39
41
|
self.callbacks = callbacks
|
|
42
|
+
self.queue = queue
|
|
40
43
|
|
|
41
44
|
|
|
42
45
|
#region functions
|
|
@@ -64,7 +67,8 @@ class ToolManager:
|
|
|
64
67
|
else:
|
|
65
68
|
search_type = "mixed"
|
|
66
69
|
search_kwargs = {"k": search_settings.search_k if search_settings.search_k else 4}
|
|
67
|
-
|
|
70
|
+
if self.queue:
|
|
71
|
+
await self.queue.put(getRandomWaitingMessage(self.app_tool.waiting_message, traduction=False))
|
|
68
72
|
return await FaissHelper.invoke(self.app_tool.vector_db, self.api_key, query, search_type, search_kwargs)
|
|
69
73
|
return []
|
|
70
74
|
#raise ValueError(f"Invalid configuration for {self.settings.name} tool of type {self.settings.type}. Must be a function or vector db not found.")
|
|
@@ -3,8 +3,8 @@ from langchain_openai import ChatOpenAI
|
|
|
3
3
|
from langchain_core.prompts import PromptTemplate
|
|
4
4
|
from ws_bom_robot_app.llm.utils.print import printString
|
|
5
5
|
|
|
6
|
-
def __print_output(data: str) ->
|
|
7
|
-
printString(data) if os.environ.get("AGENT_HANDLER_FORMATTED") == str(True) else
|
|
6
|
+
def __print_output(data: str) -> str:
|
|
7
|
+
return printString(data) if os.environ.get("AGENT_HANDLER_FORMATTED") == str(True) else f"{data} "
|
|
8
8
|
|
|
9
9
|
def getRandomWaitingMessage(waiting_messages: str, traduction: bool = True) -> str:
|
|
10
10
|
if not waiting_messages: return ""
|
|
@@ -12,13 +12,12 @@ def getRandomWaitingMessage(waiting_messages: str, traduction: bool = True) -> s
|
|
|
12
12
|
if not messages: return ""
|
|
13
13
|
chosen_message = random.choice(messages) + "\n"
|
|
14
14
|
if not traduction:
|
|
15
|
-
__print_output(chosen_message)
|
|
15
|
+
return __print_output(chosen_message)
|
|
16
16
|
return chosen_message
|
|
17
17
|
|
|
18
18
|
async def translate_text(api_key, language, text: str, callbacks: list) -> str:
|
|
19
19
|
if language == "it":
|
|
20
|
-
__print_output(text)
|
|
21
|
-
return
|
|
20
|
+
return __print_output(text)
|
|
22
21
|
llm = ChatOpenAI(api_key=api_key, model="gpt-3.5-turbo-0125", streaming=True)
|
|
23
22
|
sys_message = """Il tuo compito è di tradurre il testo_da_tradure nella seguente lingua: \n\n lingua: {language}\n\n testo_da_tradure: {testo_da_tradure} \n\nTraduci il testo_da_tradure nella lingua {language} senza aggiungere altro:"""
|
|
24
23
|
prompt = PromptTemplate.from_template(sys_message)
|
|
@@ -14,16 +14,16 @@ class HiddenPrints:
|
|
|
14
14
|
sys.stdout = self._original_stdout
|
|
15
15
|
sys.stderr = self._original_stderr
|
|
16
16
|
|
|
17
|
-
def printJson(data) ->
|
|
18
|
-
|
|
17
|
+
def printJson(data) -> str:
|
|
18
|
+
return f"{json.dumps(data, indent=2, sort_keys=True)},"
|
|
19
19
|
|
|
20
20
|
def printSingleJson(data) -> str:
|
|
21
|
-
|
|
21
|
+
return f"{json.dumps(data, indent=2, sort_keys=True)}"
|
|
22
22
|
|
|
23
|
-
def printString(data: str) ->
|
|
23
|
+
def printString(data: str) -> str:
|
|
24
24
|
if data != "":
|
|
25
|
-
printJson(data)
|
|
25
|
+
return printJson(data)
|
|
26
26
|
|
|
27
|
-
def printSingleString(data: str) ->
|
|
27
|
+
def printSingleString(data: str) -> str:
|
|
28
28
|
if data != "":
|
|
29
|
-
printSingleJson(data)
|
|
29
|
+
return printSingleJson(data)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import os, gc, shutil, traceback
|
|
1
|
+
import os, gc, shutil, logging, traceback
|
|
2
2
|
import asyncio, aiofiles, aiofiles.os
|
|
3
3
|
from fastapi import HTTPException
|
|
4
4
|
from fastapi.responses import StreamingResponse
|
|
@@ -67,7 +67,9 @@ async def kb(rq: KbRequest) -> VectorDbResponse:
|
|
|
67
67
|
documents.extend(await loaders.load())
|
|
68
68
|
except Exception as e:
|
|
69
69
|
tb = traceback.format_exc()
|
|
70
|
-
|
|
70
|
+
_error = f"File loader failure: {e} | {tb}"
|
|
71
|
+
logging.warning(_error)
|
|
72
|
+
return VectorDbResponse(success = False, error = _error)
|
|
71
73
|
except Exception as e:
|
|
72
74
|
await _cleanup_directory(working_path)
|
|
73
75
|
return VectorDbResponse(success = False, error = f"Failed to download file {e}")
|
|
@@ -78,7 +80,9 @@ async def kb(rq: KbRequest) -> VectorDbResponse:
|
|
|
78
80
|
except Exception as e:
|
|
79
81
|
await _cleanup_directory(working_path)
|
|
80
82
|
tb = traceback.format_exc()
|
|
81
|
-
|
|
83
|
+
_error = f"Endpoint failure: {e} | {tb}"
|
|
84
|
+
logging.warning(_error)
|
|
85
|
+
return VectorDbResponse(success = False, error = _error)
|
|
82
86
|
|
|
83
87
|
if rq.integrations:
|
|
84
88
|
tasks = []
|
|
@@ -95,7 +99,9 @@ async def kb(rq: KbRequest) -> VectorDbResponse:
|
|
|
95
99
|
except Exception as e:
|
|
96
100
|
await _cleanup_directory(working_path)
|
|
97
101
|
tb = traceback.format_exc()
|
|
98
|
-
|
|
102
|
+
_error = f"Integration failure: {e} | {tb}"
|
|
103
|
+
logging.warning(_error)
|
|
104
|
+
return VectorDbResponse(success=False, error=_error)
|
|
99
105
|
|
|
100
106
|
#cleanup
|
|
101
107
|
await _cleanup_directory(working_path)
|
|
@@ -116,7 +122,9 @@ async def kb(rq: KbRequest) -> VectorDbResponse:
|
|
|
116
122
|
del documents
|
|
117
123
|
gc.collect()
|
|
118
124
|
else:
|
|
119
|
-
|
|
125
|
+
_error = "No documents found in the knowledgebase folder"
|
|
126
|
+
logging.warning(_error)
|
|
127
|
+
return VectorDbResponse(success = False, error = _error)
|
|
120
128
|
|
|
121
129
|
async def kb_stream_file(filename: str):
|
|
122
130
|
file_path = os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_out, filename)
|
|
@@ -12,7 +12,7 @@ class DropboxParams(BaseModel):
|
|
|
12
12
|
Attributes:
|
|
13
13
|
remote_url (str): The URL of the remote Dropbox location, e.g. 'dropbox://demo-directory' or 'dropbox://demo-directory/sub-directory'.
|
|
14
14
|
token (str): The authentication token for accessing Dropbox.
|
|
15
|
-
create app: https://www.dropbox.com/developers, with file.content.read permission, and generate token.
|
|
15
|
+
create app: https://www.dropbox.com/developers, with file.content.read permission, and generate token, or use existing app: https://www.dropbox.com/account/connected_apps / https://www.dropbox.com/developers/apps?_tk=pilot_lp&_ad=topbar4&_camp=myapps
|
|
16
16
|
recursive (bool, optional): A flag indicating whether to search directories recursively. Defaults to False.
|
|
17
17
|
extension (list[str], optional): A list of file extensions to filter by. Defaults to None, e.g. ['.pdf', '.docx'].
|
|
18
18
|
"""
|
|
@@ -9,6 +9,7 @@ from ws_bom_robot_app.llm.vector_store.integration.googledrive import GoogleDriv
|
|
|
9
9
|
from ws_bom_robot_app.llm.vector_store.integration.jira import Jira
|
|
10
10
|
from ws_bom_robot_app.llm.vector_store.integration.s3 import S3
|
|
11
11
|
from ws_bom_robot_app.llm.vector_store.integration.sftp import Sftp
|
|
12
|
+
from ws_bom_robot_app.llm.vector_store.integration.sharepoint import Sharepoint
|
|
12
13
|
from ws_bom_robot_app.llm.vector_store.integration.sitemap import Sitemap
|
|
13
14
|
from ws_bom_robot_app.llm.vector_store.integration.slack import Slack
|
|
14
15
|
|
|
@@ -23,6 +24,7 @@ class IntegrationManager:
|
|
|
23
24
|
"llmkbjira": Jira,
|
|
24
25
|
"llmkbs3": S3,
|
|
25
26
|
"llmkbsftp": Sftp,
|
|
27
|
+
"llmkbsharepoint": Sharepoint,
|
|
26
28
|
"llmkbsitemap": Sitemap,
|
|
27
29
|
"llmkbslack": Slack,
|
|
28
30
|
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import asyncio, logging, traceback
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
|
|
4
|
+
from unstructured_ingest.v2.processes.connectors.sharepoint import SharepointIndexerConfig, SharepointIndexer, SharepointDownloaderConfig, SharepointConnectionConfig, SharepointAccessConfig
|
|
5
|
+
from langchain_core.documents import Document
|
|
6
|
+
from ws_bom_robot_app.llm.vector_store.loader.base import Loader
|
|
7
|
+
from typing import Union, Optional
|
|
8
|
+
from pydantic import BaseModel, Field, AliasChoices
|
|
9
|
+
|
|
10
|
+
class SharepointParams(BaseModel):
|
|
11
|
+
"""
|
|
12
|
+
SharepointParams is a Pydantic model that defines the parameters required to connect to a SharePoint site.
|
|
13
|
+
|
|
14
|
+
Attributes:
|
|
15
|
+
client_id (str): The client ID for SharePoint authentication.
|
|
16
|
+
client_secret (str): The client secret for SharePoint authentication.
|
|
17
|
+
site_url (str): The URL of the SharePoint site. i.e. site collection level: https://<tenant>.sharepoint.com/sites/<site-collection-name>, or root site: https://<tenant>.sharepoint.com
|
|
18
|
+
site_path (str, optional): TThe path in the SharePoint site from which to start parsing files, for example "Shared Documents". Defaults to None.
|
|
19
|
+
recursive (bool, optional): Whether to recursively access subdirectories. Defaults to False.
|
|
20
|
+
omit_files (bool, optional): Whether to omit files from the results. Defaults to False.
|
|
21
|
+
omit_pages (bool, optional): Whether to omit pages from the results. Defaults to False.
|
|
22
|
+
omit_lists (bool, optional): Whether to omit lists from the results. Defaults to False.
|
|
23
|
+
extension (list[str], optional): A list of file extensions to include, i.e. [".pdf"] Defaults to None.
|
|
24
|
+
"""
|
|
25
|
+
client_id : str = Field(validation_alias=AliasChoices("clientId","client_id"))
|
|
26
|
+
client_secret : str = Field(validation_alias=AliasChoices("clientSecret","client_secret"))
|
|
27
|
+
site_url: str = Field(validation_alias=AliasChoices("siteUrl","site_url"))
|
|
28
|
+
site_path: str = Field(default=None,validation_alias=AliasChoices("sitePath","site_path"))
|
|
29
|
+
recursive: bool = Field(default=False)
|
|
30
|
+
omit_files: bool = Field(default=False, validation_alias=AliasChoices("omitFiles","omit_files")),
|
|
31
|
+
omit_pages: bool = Field(default=False, validation_alias=AliasChoices("omitPages","omit_pages")),
|
|
32
|
+
omit_lists: bool = Field(default=False, validation_alias=AliasChoices("omitLists","omit_lists")),
|
|
33
|
+
extension: list[str] = Field(default=None)
|
|
34
|
+
class Sharepoint(IntegrationStrategy):
|
|
35
|
+
def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
|
|
36
|
+
super().__init__(knowledgebase_path, data)
|
|
37
|
+
self.__data = SharepointParams.model_validate(self.data)
|
|
38
|
+
self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
|
|
39
|
+
def working_subdirectory(self) -> str:
|
|
40
|
+
return 'sharepoint'
|
|
41
|
+
def run(self) -> None:
|
|
42
|
+
indexer_config = SharepointIndexerConfig(
|
|
43
|
+
path=self.__data.site_path,
|
|
44
|
+
recursive=self.__data.recursive,
|
|
45
|
+
omit_files=self.__data.omit_files,
|
|
46
|
+
omit_pages=self.__data.omit_pages,
|
|
47
|
+
omit_lists=self.__data.omit_lists
|
|
48
|
+
)
|
|
49
|
+
downloader_config = SharepointDownloaderConfig(
|
|
50
|
+
download_dir=self.working_directory
|
|
51
|
+
)
|
|
52
|
+
connection_config = SharepointConnectionConfig(
|
|
53
|
+
access_config=SharepointAccessConfig(client_cred=self.__data.client_secret),
|
|
54
|
+
client_id=self.__data.client_id,
|
|
55
|
+
site=self.__data.site_url,
|
|
56
|
+
permissions_config=None
|
|
57
|
+
)
|
|
58
|
+
pipeline = self.__unstructured_ingest.pipeline(
|
|
59
|
+
indexer_config,
|
|
60
|
+
downloader_config,
|
|
61
|
+
connection_config,
|
|
62
|
+
extension=self.__data.extension)
|
|
63
|
+
current_indexer_process = pipeline.indexer_step.process
|
|
64
|
+
pipeline.indexer_step.process = CustomSharepointIndexer(**vars(current_indexer_process))
|
|
65
|
+
pipeline.run()
|
|
66
|
+
async def load(self) -> list[Document]:
|
|
67
|
+
await asyncio.to_thread(self.run)
|
|
68
|
+
await asyncio.sleep(1)
|
|
69
|
+
return await Loader(self.working_directory).load()
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class CustomSharepointIndexer(SharepointIndexer):
|
|
73
|
+
def __init__(self, **kwargs):
|
|
74
|
+
# Initialize all attributes from the base indexer
|
|
75
|
+
for key, value in kwargs.items():
|
|
76
|
+
setattr(self, key, value)
|
|
77
|
+
def list_files(self, folder, recursive):
|
|
78
|
+
try:
|
|
79
|
+
_files = super().list_files(folder, recursive)
|
|
80
|
+
return _files
|
|
81
|
+
except Exception as e:
|
|
82
|
+
tb = traceback.format_exc()
|
|
83
|
+
logging.error(f"Error listing sharepoint files: {e} \n {tb}")
|
|
84
|
+
return []
|
|
85
|
+
def file_to_file_data(self, client, file):
|
|
86
|
+
try:
|
|
87
|
+
return super().file_to_file_data(client, file)
|
|
88
|
+
except Exception as e:
|
|
89
|
+
tb = traceback.format_exc()
|
|
90
|
+
logging.error(f"Error converting sharepoint file {file} to data: {e} \n {tb}")
|
|
91
|
+
return None
|
|
92
|
+
def list_pages(self, client):
|
|
93
|
+
try:
|
|
94
|
+
_pages = super().list_pages(client)
|
|
95
|
+
_allowed_content_type = None
|
|
96
|
+
for page in _pages:
|
|
97
|
+
# determine the allowed content type from the first page (Home.aspx)
|
|
98
|
+
if not _allowed_content_type:
|
|
99
|
+
_allowed_content_type = page.content_type_id
|
|
100
|
+
if not page.content_type_id == _allowed_content_type:
|
|
101
|
+
_pages.remove_child(page)
|
|
102
|
+
return _pages
|
|
103
|
+
except Exception as e:
|
|
104
|
+
tb = traceback.format_exc()
|
|
105
|
+
logging.error(f"Error listing sharepoint pages: {e} \n {tb}")
|
|
106
|
+
return []
|
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
|
|
2
1
|
import asyncio, gc, logging, os, traceback
|
|
3
2
|
from typing import Any, Optional
|
|
4
3
|
from langchain_community.document_loaders import DirectoryLoader
|
|
5
4
|
from langchain_community.document_loaders.base import BaseLoader
|
|
6
5
|
from langchain_community.document_loaders.merge import MergedDataLoader
|
|
7
6
|
from langchain_core.documents import Document
|
|
8
|
-
from langchain_unstructured import UnstructuredLoader
|
|
9
7
|
from pydantic import BaseModel
|
|
10
8
|
from ws_bom_robot_app.config import config
|
|
11
9
|
from ws_bom_robot_app.llm.vector_store.loader.json_loader import JsonLoader
|
|
10
|
+
from ws_bom_robot_app.llm.vector_store.loader.docling import DoclingLoader
|
|
11
|
+
from langchain_community.document_loaders import CSVLoader, UnstructuredPowerPointLoader, UnstructuredEmailLoader, UnstructuredXMLLoader, TextLoader, UnstructuredHTMLLoader
|
|
12
12
|
|
|
13
13
|
class LoaderConfig(BaseModel):
|
|
14
14
|
loader: type[BaseLoader]
|
|
@@ -22,39 +22,46 @@ class Loader():
|
|
|
22
22
|
|
|
23
23
|
_list: dict[str, LoaderConfig | None] = {
|
|
24
24
|
'.json': LoaderConfig(loader=JsonLoader),
|
|
25
|
-
'.csv': LoaderConfig(loader=
|
|
26
|
-
'.xls':
|
|
27
|
-
'.xlsx': LoaderConfig(loader=
|
|
28
|
-
'.eml': LoaderConfig(loader=
|
|
29
|
-
'.msg': LoaderConfig(loader=
|
|
25
|
+
'.csv': LoaderConfig(loader=CSVLoader),
|
|
26
|
+
'.xls': None,
|
|
27
|
+
'.xlsx': LoaderConfig(loader=DoclingLoader),
|
|
28
|
+
'.eml': LoaderConfig(loader=UnstructuredEmailLoader,kwargs={"strategy":"auto", "process_attachments": False}),
|
|
29
|
+
'.msg': LoaderConfig(loader=UnstructuredEmailLoader,kwargs={"strategy":"auto", "process_attachments": False}),
|
|
30
30
|
'.epub': None,
|
|
31
|
-
'.md': LoaderConfig(loader=
|
|
31
|
+
'.md': LoaderConfig(loader=TextLoader),
|
|
32
32
|
'.org': None,
|
|
33
33
|
'.odt': None,
|
|
34
34
|
'.ppt': None,
|
|
35
|
-
'.pptx': LoaderConfig(loader=
|
|
36
|
-
'.txt': LoaderConfig(loader=
|
|
35
|
+
'.pptx': LoaderConfig(loader=UnstructuredPowerPointLoader,kwargs={"strategy":"auto"}), #docling issue with WMF https://github.com/DS4SD/docling/issues/594
|
|
36
|
+
'.txt': LoaderConfig(loader=TextLoader),
|
|
37
37
|
'.rst': None,
|
|
38
38
|
'.rtf': None,
|
|
39
39
|
'.tsv': None,
|
|
40
40
|
'.text': None,
|
|
41
41
|
'.log': None,
|
|
42
|
-
'.htm': LoaderConfig(loader=
|
|
43
|
-
'.html': LoaderConfig(loader=
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
'
|
|
52
|
-
'.
|
|
53
|
-
'.
|
|
42
|
+
'.htm': LoaderConfig(loader=UnstructuredHTMLLoader,kwargs={"strategy":"auto"}),
|
|
43
|
+
'.html': LoaderConfig(loader=UnstructuredHTMLLoader,kwargs={"strategy":"auto"}),
|
|
44
|
+
".pdf": LoaderConfig(loader=DoclingLoader),
|
|
45
|
+
#'.pdf': LoaderConfig(loader=UnstructuredLoader,kwargs={
|
|
46
|
+
# 'strategy':'ocr_only', #https://docs.unstructured.io/open-source/core-functionality/partitioning auto,ocr_only,hi_res
|
|
47
|
+
# 'split_pdf_page': False,
|
|
48
|
+
# 'chunking_strategy': 'basic',
|
|
49
|
+
# 'max_characters': 10_000,
|
|
50
|
+
# 'include_page_breaks': True,
|
|
51
|
+
# 'include_orig_elements': False}),
|
|
52
|
+
#'.png': LoaderConfig(loader=UnstructuredLoader,kwargs={"strategy":"ocr_only"}),
|
|
53
|
+
#'.jpg': LoaderConfig(loader=UnstructuredLoader,kwargs={"strategy":"ocr_only"}),
|
|
54
|
+
#'.jpeg': LoaderConfig(loader=UnstructuredLoader,kwargs={"strategy":"ocr_only"}),
|
|
55
|
+
'.png': LoaderConfig(loader=DoclingLoader),
|
|
56
|
+
'.jpg': LoaderConfig(loader=DoclingLoader),
|
|
57
|
+
'.jpeg': LoaderConfig(loader=DoclingLoader),
|
|
58
|
+
'.gif': None,
|
|
59
|
+
".emf": None,
|
|
60
|
+
".wmf": None,
|
|
54
61
|
'.tiff': None,
|
|
55
62
|
'.doc': None, #see liberoffice dependency
|
|
56
|
-
'.docx': LoaderConfig(loader=
|
|
57
|
-
'.xml': LoaderConfig(loader=
|
|
63
|
+
'.docx': LoaderConfig(loader=DoclingLoader),
|
|
64
|
+
'.xml': LoaderConfig(loader=UnstructuredXMLLoader,kwargs={"strategy":"auto"}),
|
|
58
65
|
'.js': None,
|
|
59
66
|
'.py': None,
|
|
60
67
|
'.c': None,
|
|
@@ -78,7 +85,7 @@ class Loader():
|
|
|
78
85
|
loader_configs = {}
|
|
79
86
|
for ext, loader_config in Loader._list.items():
|
|
80
87
|
if loader_config:
|
|
81
|
-
if all([self._runtime_options.loader_strategy != "",loader_config.kwargs
|
|
88
|
+
if all([self._runtime_options.loader_strategy != "",loader_config.kwargs and "strategy" in loader_config.kwargs]): # type: ignore
|
|
82
89
|
loader_config.kwargs["strategy"] = self._runtime_options.loader_strategy # type: ignore
|
|
83
90
|
loader_key = (loader_config.loader, tuple(loader_config.kwargs.items())) # type: ignore
|
|
84
91
|
if loader_key not in loader_configs:
|
|
@@ -119,7 +126,7 @@ class Loader():
|
|
|
119
126
|
return _documents
|
|
120
127
|
except Exception as e:
|
|
121
128
|
logging.warning(f"Attempt {attempt+1} load document failed: {e}")
|
|
122
|
-
await asyncio.sleep(
|
|
129
|
+
await asyncio.sleep(2)
|
|
123
130
|
if attempt == MAX_RETRIES - 1:
|
|
124
131
|
tb = traceback.format_exc()
|
|
125
132
|
logging.error(f"Failed to load documents: {e} | {tb}")
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import os, logging, traceback
|
|
2
|
+
from typing import Iterator, AsyncIterator, Optional
|
|
3
|
+
from langchain_core.document_loaders import BaseLoader
|
|
4
|
+
from langchain_core.documents import Document
|
|
5
|
+
from langchain_core.runnables import run_in_executor
|
|
6
|
+
from docling.document_converter import DocumentConverter, ConversionResult, ConversionStatus
|
|
7
|
+
|
|
8
|
+
class DoclingLoader(BaseLoader):
|
|
9
|
+
def __init__(self, file_path: str | list[str]) -> None:
|
|
10
|
+
self._file_paths = file_path if isinstance(file_path, list) else [file_path]
|
|
11
|
+
self._converter = DocumentConverter()
|
|
12
|
+
def load(self) -> list[Document]:
|
|
13
|
+
"""Load data into Document objects."""
|
|
14
|
+
return list(self.lazy_load())
|
|
15
|
+
async def aload(self) -> list[Document]:
|
|
16
|
+
"""Load data into Document objects."""
|
|
17
|
+
return [document async for document in self.alazy_load()]
|
|
18
|
+
async def alazy_load(self) -> AsyncIterator[Document]:
|
|
19
|
+
"""A lazy loader for Documents."""
|
|
20
|
+
iterator = await run_in_executor(None, self.lazy_load)
|
|
21
|
+
done = object()
|
|
22
|
+
while True:
|
|
23
|
+
doc = await run_in_executor(None, next, iterator, done) # type: ignore[call-arg, arg-type]
|
|
24
|
+
if doc is done:
|
|
25
|
+
break
|
|
26
|
+
yield doc # type: ignore[misc]
|
|
27
|
+
def lazy_load(self) -> Iterator[Document]:
|
|
28
|
+
for source in self._file_paths:
|
|
29
|
+
try:
|
|
30
|
+
_result = self._converter.convert(os.path.abspath(source),raises_on_error=True)
|
|
31
|
+
doc = _result.document
|
|
32
|
+
text = doc.export_to_markdown()
|
|
33
|
+
yield Document(page_content=text, metadata={"source": source})
|
|
34
|
+
except Exception as e:
|
|
35
|
+
logging.warning(f"Failed to load document from {source}: {e} | {traceback.format_exc()}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ws_bom_robot_app
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.25
|
|
4
4
|
Summary: A FastAPI application serving ws bom/robot/llm platform ai.
|
|
5
5
|
Home-page: https://github.com/websolutespa/bom
|
|
6
6
|
Author: Websolute Spa
|
|
@@ -21,21 +21,20 @@ Requires-Dist: langchain-openai==0.2.10
|
|
|
21
21
|
Requires-Dist: langchain-community==0.3.8
|
|
22
22
|
Requires-Dist: langchain-core==0.3.21
|
|
23
23
|
Requires-Dist: faiss-cpu==1.9.0
|
|
24
|
-
Requires-Dist:
|
|
25
|
-
Requires-Dist:
|
|
26
|
-
Requires-Dist: unstructured[all-docs]==0.16.11
|
|
27
|
-
Requires-Dist: langchain_unstructured==0.1.5
|
|
24
|
+
Requires-Dist: unstructured==0.16.11
|
|
25
|
+
Requires-Dist: unstructured[image]
|
|
28
26
|
Requires-Dist: unstructured-ingest==0.3.8
|
|
29
27
|
Requires-Dist: unstructured-ingest[azure]
|
|
30
28
|
Requires-Dist: unstructured-ingest[confluence]
|
|
31
29
|
Requires-Dist: unstructured-ingest[dropbox]
|
|
32
30
|
Requires-Dist: unstructured-ingest[gcs]
|
|
33
|
-
Requires-Dist: unstructured-ingest[google_drive]
|
|
34
31
|
Requires-Dist: unstructured-ingest[github]
|
|
32
|
+
Requires-Dist: unstructured-ingest[google_drive]
|
|
35
33
|
Requires-Dist: unstructured-ingest[jira]
|
|
36
34
|
Requires-Dist: unstructured-ingest[s3]
|
|
37
|
-
Requires-Dist: unstructured-ingest[slack]
|
|
38
35
|
Requires-Dist: unstructured-ingest[sftp]
|
|
36
|
+
Requires-Dist: unstructured-ingest[sharepoint]
|
|
37
|
+
Requires-Dist: unstructured-ingest[slack]
|
|
39
38
|
Requires-Dist: html5lib==1.1
|
|
40
39
|
Requires-Dist: markdownify==0.14.1
|
|
41
40
|
Requires-Dist: nebuly==0.3.33
|
|
@@ -214,11 +213,22 @@ launch debugger
|
|
|
214
213
|
streamlit run debugger.py --server.port 6002
|
|
215
214
|
```
|
|
216
215
|
|
|
216
|
+
dockerize base image
|
|
217
|
+
|
|
218
|
+
```pwsh
|
|
219
|
+
<# cpu #>
|
|
220
|
+
docker build -f Dockerfile-robot-base-cpu -t ghcr.io/websolutespa/ws-bom-robot-base:cpu .
|
|
221
|
+
docker push ghcr.io/websolutespa/ws-bom-robot-base:cpu
|
|
222
|
+
<# gpu #>
|
|
223
|
+
docker build -f Dockerfile-robot-base-gpu -t ghcr.io/websolutespa/ws-bom-robot-base:gpu .
|
|
224
|
+
docker push ghcr.io/websolutespa/ws-bom-robot-base:gpu
|
|
225
|
+
```
|
|
226
|
+
|
|
217
227
|
dockerize app from src
|
|
218
228
|
|
|
219
229
|
```pwsh
|
|
220
230
|
docker build -f Dockerfile-src -t ws-bom-robot-app:src .
|
|
221
|
-
docker run --name ws-bom-robot-app-src -d -v "$(pwd)/ws_bom_robot_app:/app/ws_bom_robot_app" -p 6001:6001 ws-bom-robot-app:src
|
|
231
|
+
docker run --name ws-bom-robot-app-src -d -v "$(pwd)/ws_bom_robot_app:/app/ws_bom_robot_app" -v "$(pwd)/.data:/app/.data" -p 6001:6001 ws-bom-robot-app:src
|
|
222
232
|
```
|
|
223
233
|
|
|
224
234
|
### ✈️ publish
|
|
@@ -7,20 +7,20 @@ ws_bom_robot_app/task_manager.py,sha256=Zedzs2R3O-wNSQOqs4jorgFwPRi-ji_0TN4mGfk-
|
|
|
7
7
|
ws_bom_robot_app/util.py,sha256=b49ItlZgh2Wzw-6K8k5Wa44eVgjQ0JmWQwJnEaQBVGw,3502
|
|
8
8
|
ws_bom_robot_app/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
ws_bom_robot_app/llm/agent_description.py,sha256=SDJYMmwfdMxEK3a_HDEQ19bfNKmwMSFf5hqU0VSCCIE,4705
|
|
10
|
-
ws_bom_robot_app/llm/agent_handler.py,sha256=
|
|
10
|
+
ws_bom_robot_app/llm/agent_handler.py,sha256=Qz3h1eZdA6pkurEbr8sQwl-0FdjugaO5Q9sB8f7iD9I,5808
|
|
11
11
|
ws_bom_robot_app/llm/agent_lcel.py,sha256=jkSLMy6y_ZFvWT8bhBBYHY5CO-ea8oMSPMXMahFUBFc,2666
|
|
12
|
-
ws_bom_robot_app/llm/api.py,sha256=
|
|
12
|
+
ws_bom_robot_app/llm/api.py,sha256=vBu_TFTlBjp7e3J-WmlZbXn_TbB550x-NpQN4YsO7To,3004
|
|
13
13
|
ws_bom_robot_app/llm/defaut_prompt.py,sha256=pn5a4lNLWE1NngHYjA_7tD8GasePMgsgude5fIJxsW0,756
|
|
14
|
-
ws_bom_robot_app/llm/main.py,sha256=
|
|
14
|
+
ws_bom_robot_app/llm/main.py,sha256=_uW3Iy9iPJbxDfpyoReu3mbYY8a9dS1V6tZU-z6BELo,3547
|
|
15
15
|
ws_bom_robot_app/llm/settings.py,sha256=EkFGCppORenStH9W4e6_dYvQ-5p6xiEMpmUHBqNqG9M,117
|
|
16
16
|
ws_bom_robot_app/llm/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
17
|
ws_bom_robot_app/llm/models/api.py,sha256=KlVUbApyz6uuWefAN9K4B_vWDSps5hLW6hNg1Eo3TBQ,6996
|
|
18
18
|
ws_bom_robot_app/llm/models/base.py,sha256=1TqxuTK3rjJEALn7lvgoen_1ba3R2brAgGx6EDTtDZo,152
|
|
19
19
|
ws_bom_robot_app/llm/models/kb.py,sha256=9zqwDlVULVrWE48wo5AivzWoOtnjA57k9rsw8KNnyDk,8935
|
|
20
20
|
ws_bom_robot_app/llm/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
-
ws_bom_robot_app/llm/tools/tool_builder.py,sha256=
|
|
22
|
-
ws_bom_robot_app/llm/tools/tool_manager.py,sha256=
|
|
23
|
-
ws_bom_robot_app/llm/tools/utils.py,sha256=
|
|
21
|
+
ws_bom_robot_app/llm/tools/tool_builder.py,sha256=z9SdwD6dJaJbLByHIGUaIbqbNm33an9agNnm5njSb6Q,901
|
|
22
|
+
ws_bom_robot_app/llm/tools/tool_manager.py,sha256=DzJLQCLBb2jesOx2rR56_z3KyWqwJpvUGD16ImxOj34,4495
|
|
23
|
+
ws_bom_robot_app/llm/tools/utils.py,sha256=yT8dJ2pywCJb-6VlgltVPEa4-b3XT8UYWUqW9m1cKWo,1307
|
|
24
24
|
ws_bom_robot_app/llm/tools/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
25
|
ws_bom_robot_app/llm/tools/models/main.py,sha256=LsOJ7vkcSzYLoE1oa3TG0Rs0pr9J5VS_e4li6aDx_fw,260
|
|
26
26
|
ws_bom_robot_app/llm/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -28,28 +28,30 @@ ws_bom_robot_app/llm/utils/agent_utils.py,sha256=LEfAKQwFrwmIdJL0o54iuGrir9uLcJh
|
|
|
28
28
|
ws_bom_robot_app/llm/utils/download.py,sha256=iAUxH_NiCpTPtGzhC4hBtxotd2HPFt2MBhttslIxqiI,3194
|
|
29
29
|
ws_bom_robot_app/llm/utils/faiss_helper.py,sha256=69juxptz1gidgxVOrqNvJajRl40p5-ugHqyEBDtnSKo,5036
|
|
30
30
|
ws_bom_robot_app/llm/utils/kb.py,sha256=jja45WCbNI7SGEgqDS99nErlwB5eY8Ga7BMnhdMHZ90,1279
|
|
31
|
-
ws_bom_robot_app/llm/utils/print.py,sha256=
|
|
31
|
+
ws_bom_robot_app/llm/utils/print.py,sha256=ZonoLPcfM6Cpw4_Ec455LiCovExOwvnIgvw1QORSCBY,799
|
|
32
32
|
ws_bom_robot_app/llm/utils/webhooks.py,sha256=LAAZqyN6VhV13wu4X-X85TwdDgAV2rNvIwQFIIc0FJM,2114
|
|
33
33
|
ws_bom_robot_app/llm/vector_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
|
-
ws_bom_robot_app/llm/vector_store/generator.py,sha256=
|
|
34
|
+
ws_bom_robot_app/llm/vector_store/generator.py,sha256=Xg-srcH_03lqPHkMn1EXP56GbY1CYa2zIbjvNfcQqyM,6192
|
|
35
35
|
ws_bom_robot_app/llm/vector_store/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
36
|
ws_bom_robot_app/llm/vector_store/integration/azure.py,sha256=R37TaPQP-HJJJiaKE9rmMc9kpeXeRvdebbTY_982om0,3392
|
|
37
37
|
ws_bom_robot_app/llm/vector_store/integration/base.py,sha256=IvIu8RkISuurrVKr2YPG96fsOI2kqhaEGyTGzjB-jCI,1550
|
|
38
38
|
ws_bom_robot_app/llm/vector_store/integration/confluence.py,sha256=4fiRHB3J-SHZZxNGHwVkCrT-xSPbc91z4WrDE9fy6xU,2505
|
|
39
|
-
ws_bom_robot_app/llm/vector_store/integration/dropbox.py,sha256=
|
|
39
|
+
ws_bom_robot_app/llm/vector_store/integration/dropbox.py,sha256=yhGvHTN0TEpUfhdvvV7RX5MxBwTUyddAX95Fgqp3mCg,2629
|
|
40
40
|
ws_bom_robot_app/llm/vector_store/integration/gcs.py,sha256=fFDVDUR6eNB7FVTzDSEpMHFEWMgG16GLnpSf_mqGDdE,3184
|
|
41
41
|
ws_bom_robot_app/llm/vector_store/integration/github.py,sha256=18PO30AZcgTn6PHhid3MwImVAdmKBNkr0kmAPgOetGw,2663
|
|
42
42
|
ws_bom_robot_app/llm/vector_store/integration/googledrive.py,sha256=R6hr8iEgrR3QMOzIj5jY6w1x8pZ1LGdh4xM_q7g_ttc,3738
|
|
43
43
|
ws_bom_robot_app/llm/vector_store/integration/jira.py,sha256=o5iINIblp_yNszp54nf7fW97aqjs0A5G89N8sYrd1ds,2771
|
|
44
|
-
ws_bom_robot_app/llm/vector_store/integration/manager.py,sha256=
|
|
44
|
+
ws_bom_robot_app/llm/vector_store/integration/manager.py,sha256=5Fl3XML6f1wmgraigpUwIFIXh7QFPX0RI0YFgFxBAvg,1700
|
|
45
45
|
ws_bom_robot_app/llm/vector_store/integration/s3.py,sha256=3kh-VmH84IW7DdSLvOk6td1VBJ9aohlVJsk5F3cYj0U,3320
|
|
46
46
|
ws_bom_robot_app/llm/vector_store/integration/sftp.py,sha256=WNzjjS1EUykgFB-8e7QkecSa1r1jTJqKyGzR25uJCtM,2848
|
|
47
|
+
ws_bom_robot_app/llm/vector_store/integration/sharepoint.py,sha256=zqqn-6qPrK50Phch4nZHJTgaPyPkGe7W2InGL_Ru6vE,5376
|
|
47
48
|
ws_bom_robot_app/llm/vector_store/integration/sitemap.py,sha256=nPbIywp-ZwWbWStvjvYVgHqqejyYFr8eZhBc8ycTuaU,4206
|
|
48
49
|
ws_bom_robot_app/llm/vector_store/integration/slack.py,sha256=FMjESXm2QetFXI6i8epze7Kbbu22fV8CVaxb71AHnJ8,2572
|
|
49
50
|
ws_bom_robot_app/llm/vector_store/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
50
|
-
ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=
|
|
51
|
+
ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=SWV7T6BcsV8nvnUAHHZ9Q2oFUEnfwM33jpJCry5vbIA,5847
|
|
52
|
+
ws_bom_robot_app/llm/vector_store/loader/docling.py,sha256=aHHfMf2JsZo0o6jrRDlImY0Oi9NFhVQk8Wg5ePAPa50,1721
|
|
51
53
|
ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=qo9ejRZyKv_k6jnGgXnu1W5uqsMMtgqK_uvPpZQ0p74,833
|
|
52
|
-
ws_bom_robot_app-0.0.
|
|
53
|
-
ws_bom_robot_app-0.0.
|
|
54
|
-
ws_bom_robot_app-0.0.
|
|
55
|
-
ws_bom_robot_app-0.0.
|
|
54
|
+
ws_bom_robot_app-0.0.25.dist-info/METADATA,sha256=TObdL0LhroQrJaqOUTwLEY9gqyk_ct-yDPJzcOWps_w,7478
|
|
55
|
+
ws_bom_robot_app-0.0.25.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
56
|
+
ws_bom_robot_app-0.0.25.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
|
|
57
|
+
ws_bom_robot_app-0.0.25.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|