ws-bom-robot-app 0.0.30__py3-none-any.whl → 0.0.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ws_bom_robot_app/llm/agent_description.py +124 -124
- ws_bom_robot_app/llm/agent_handler.py +167 -167
- ws_bom_robot_app/llm/agent_lcel.py +64 -64
- ws_bom_robot_app/llm/defaut_prompt.py +9 -9
- ws_bom_robot_app/llm/main.py +102 -102
- ws_bom_robot_app/llm/settings.py +4 -4
- ws_bom_robot_app/llm/tools/tool_builder.py +19 -19
- ws_bom_robot_app/llm/tools/tool_manager.py +101 -101
- ws_bom_robot_app/llm/tools/utils.py +25 -25
- ws_bom_robot_app/llm/utils/agent_utils.py +16 -16
- ws_bom_robot_app/llm/utils/download.py +79 -79
- ws_bom_robot_app/llm/utils/print.py +29 -29
- ws_bom_robot_app/llm/vector_store/generator.py +137 -137
- ws_bom_robot_app/llm/vector_store/loader/base.py +2 -2
- ws_bom_robot_app/llm/vector_store/loader/json_loader.py +25 -25
- {ws_bom_robot_app-0.0.30.dist-info → ws_bom_robot_app-0.0.31.dist-info}/METADATA +2 -5
- {ws_bom_robot_app-0.0.30.dist-info → ws_bom_robot_app-0.0.31.dist-info}/RECORD +19 -19
- {ws_bom_robot_app-0.0.30.dist-info → ws_bom_robot_app-0.0.31.dist-info}/WHEEL +0 -0
- {ws_bom_robot_app-0.0.30.dist-info → ws_bom_robot_app-0.0.31.dist-info}/top_level.txt +0 -0
|
@@ -1,25 +1,25 @@
|
|
|
1
|
-
import random, os
|
|
2
|
-
from langchain_openai import ChatOpenAI
|
|
3
|
-
from langchain_core.prompts import PromptTemplate
|
|
4
|
-
from ws_bom_robot_app.llm.utils.print import printString
|
|
5
|
-
|
|
6
|
-
def __print_output(data: str) -> str:
|
|
7
|
-
return printString(data) if os.environ.get("AGENT_HANDLER_FORMATTED") == str(True) else f"{data} "
|
|
8
|
-
|
|
9
|
-
def getRandomWaitingMessage(waiting_messages: str, traduction: bool = True) -> str:
|
|
10
|
-
if not waiting_messages: return ""
|
|
11
|
-
messages = [msg.strip() for msg in waiting_messages.split(";") if msg.strip()]
|
|
12
|
-
if not messages: return ""
|
|
13
|
-
chosen_message = random.choice(messages) + "\n"
|
|
14
|
-
if not traduction:
|
|
15
|
-
return __print_output(chosen_message)
|
|
16
|
-
return chosen_message
|
|
17
|
-
|
|
18
|
-
async def translate_text(api_key, language, text: str, callbacks: list) -> str:
|
|
19
|
-
if language == "it":
|
|
20
|
-
return __print_output(text)
|
|
21
|
-
llm = ChatOpenAI(api_key=api_key, model="gpt-3.5-turbo-0125", streaming=True)
|
|
22
|
-
sys_message = """Il tuo compito è di tradurre il testo_da_tradure nella seguente lingua: \n\n lingua: {language}\n\n testo_da_tradure: {testo_da_tradure} \n\nTraduci il testo_da_tradure nella lingua {language} senza aggiungere altro:"""
|
|
23
|
-
prompt = PromptTemplate.from_template(sys_message)
|
|
24
|
-
chain = prompt | llm
|
|
25
|
-
await chain.ainvoke({"language":language, "testo_da_tradure": text}, {"callbacks": callbacks})
|
|
1
|
+
import random, os
|
|
2
|
+
from langchain_openai import ChatOpenAI
|
|
3
|
+
from langchain_core.prompts import PromptTemplate
|
|
4
|
+
from ws_bom_robot_app.llm.utils.print import printString
|
|
5
|
+
|
|
6
|
+
def __print_output(data: str) -> str:
|
|
7
|
+
return printString(data) if os.environ.get("AGENT_HANDLER_FORMATTED") == str(True) else f"{data} "
|
|
8
|
+
|
|
9
|
+
def getRandomWaitingMessage(waiting_messages: str, traduction: bool = True) -> str:
|
|
10
|
+
if not waiting_messages: return ""
|
|
11
|
+
messages = [msg.strip() for msg in waiting_messages.split(";") if msg.strip()]
|
|
12
|
+
if not messages: return ""
|
|
13
|
+
chosen_message = random.choice(messages) + "\n"
|
|
14
|
+
if not traduction:
|
|
15
|
+
return __print_output(chosen_message)
|
|
16
|
+
return chosen_message
|
|
17
|
+
|
|
18
|
+
async def translate_text(api_key, language, text: str, callbacks: list) -> str:
|
|
19
|
+
if language == "it":
|
|
20
|
+
return __print_output(text)
|
|
21
|
+
llm = ChatOpenAI(api_key=api_key, model="gpt-3.5-turbo-0125", streaming=True)
|
|
22
|
+
sys_message = """Il tuo compito è di tradurre il testo_da_tradure nella seguente lingua: \n\n lingua: {language}\n\n testo_da_tradure: {testo_da_tradure} \n\nTraduci il testo_da_tradure nella lingua {language} senza aggiungere altro:"""
|
|
23
|
+
prompt = PromptTemplate.from_template(sys_message)
|
|
24
|
+
chain = prompt | llm
|
|
25
|
+
await chain.ainvoke({"language":language, "testo_da_tradure": text}, {"callbacks": callbacks})
|
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from ws_bom_robot_app.llm.models.api import LlmRules
|
|
3
|
-
from ws_bom_robot_app.llm.utils.print import HiddenPrints
|
|
4
|
-
from ws_bom_robot_app.llm.utils.faiss_helper import FaissHelper
|
|
5
|
-
|
|
6
|
-
async def get_rules(rules: LlmRules, api_key:str, input: str) -> str:
|
|
7
|
-
with HiddenPrints():
|
|
8
|
-
if any([input=="",rules is None,rules and rules.vector_db == "",rules and not os.path.exists(rules.vector_db)]):
|
|
9
|
-
return ""
|
|
10
|
-
rules_prompt = ""
|
|
11
|
-
rules_doc = await FaissHelper.invoke(rules.vector_db,api_key,input,search_type="similarity_score_threshold", search_kwargs={"score_threshold": rules.threshold}) #type: ignore
|
|
12
|
-
if len(rules_doc) > 0:
|
|
13
|
-
rules_prompt = "\nFollow this rules: \n RULES: \n"
|
|
14
|
-
for rule_doc in rules_doc:
|
|
15
|
-
rules_prompt += "- " + rule_doc.page_content + "\n"
|
|
16
|
-
return rules_prompt
|
|
1
|
+
import os
|
|
2
|
+
from ws_bom_robot_app.llm.models.api import LlmRules
|
|
3
|
+
from ws_bom_robot_app.llm.utils.print import HiddenPrints
|
|
4
|
+
from ws_bom_robot_app.llm.utils.faiss_helper import FaissHelper
|
|
5
|
+
|
|
6
|
+
async def get_rules(rules: LlmRules, api_key:str, input: str) -> str:
|
|
7
|
+
with HiddenPrints():
|
|
8
|
+
if any([input=="",rules is None,rules and rules.vector_db == "",rules and not os.path.exists(rules.vector_db)]):
|
|
9
|
+
return ""
|
|
10
|
+
rules_prompt = ""
|
|
11
|
+
rules_doc = await FaissHelper.invoke(rules.vector_db,api_key,input,search_type="similarity_score_threshold", search_kwargs={"score_threshold": rules.threshold}) #type: ignore
|
|
12
|
+
if len(rules_doc) > 0:
|
|
13
|
+
rules_prompt = "\nFollow this rules: \n RULES: \n"
|
|
14
|
+
for rule_doc in rules_doc:
|
|
15
|
+
rules_prompt += "- " + rule_doc.page_content + "\n"
|
|
16
|
+
return rules_prompt
|
|
@@ -1,79 +1,79 @@
|
|
|
1
|
-
from typing import List,Optional
|
|
2
|
-
import os, logging, aiohttp, asyncio
|
|
3
|
-
from tqdm.asyncio import tqdm
|
|
4
|
-
|
|
5
|
-
async def download_files(urls: List[str], destination_folder: str, authorization: str = None):
|
|
6
|
-
tasks = [download_file(file, os.path.join(destination_folder, os.path.basename(file)), authorization=authorization) for file in urls]
|
|
7
|
-
results = await asyncio.gather(*tasks, return_exceptions=False)
|
|
8
|
-
for i, result in enumerate(results):
|
|
9
|
-
if not result:
|
|
10
|
-
raise Exception(f"Download failed for file: {urls[i]}")
|
|
11
|
-
|
|
12
|
-
async def download_file(url: str, destination: str, chunk_size: int = 8192, authorization: str = None) -> Optional[str]:
|
|
13
|
-
"""
|
|
14
|
-
Downloads a file from a given URL to a destination path asynchronously.
|
|
15
|
-
|
|
16
|
-
Args:
|
|
17
|
-
url: The URL of the file to download
|
|
18
|
-
destination: The local path where the file should be saved
|
|
19
|
-
chunk_size: Size of chunks to download (default: 8192 bytes)
|
|
20
|
-
|
|
21
|
-
Returns:
|
|
22
|
-
str: Path to the downloaded file if successful, None otherwise
|
|
23
|
-
|
|
24
|
-
Raises:
|
|
25
|
-
Various exceptions are caught and logged
|
|
26
|
-
"""
|
|
27
|
-
try:
|
|
28
|
-
# Ensure the destination directory exists
|
|
29
|
-
os.makedirs(os.path.dirname(os.path.abspath(destination)), exist_ok=True)
|
|
30
|
-
|
|
31
|
-
async with aiohttp.ClientSession() as session:
|
|
32
|
-
if authorization:
|
|
33
|
-
headers = {'Authorization': authorization}
|
|
34
|
-
session.headers.update(headers)
|
|
35
|
-
async with session.get(url) as response:
|
|
36
|
-
# Check if the request was successful
|
|
37
|
-
if response.status != 200:
|
|
38
|
-
logging.error(f"Failed to download file. Status code: {response.status}")
|
|
39
|
-
return None
|
|
40
|
-
|
|
41
|
-
# Get the total file size if available
|
|
42
|
-
total_size = int(response.headers.get('content-length', 0))
|
|
43
|
-
# Open the destination file and write chunks
|
|
44
|
-
with open(destination, 'wb') as f:
|
|
45
|
-
with tqdm(
|
|
46
|
-
total=total_size,
|
|
47
|
-
desc="Downloading",
|
|
48
|
-
unit='B',
|
|
49
|
-
unit_scale=True,
|
|
50
|
-
unit_divisor=1024
|
|
51
|
-
) as pbar:
|
|
52
|
-
async for chunk in response.content.iter_chunked(chunk_size):
|
|
53
|
-
if chunk:
|
|
54
|
-
f.write(chunk)
|
|
55
|
-
pbar.update(len(chunk))
|
|
56
|
-
|
|
57
|
-
logging.info(f"File downloaded successfully to {destination}")
|
|
58
|
-
return destination
|
|
59
|
-
|
|
60
|
-
except aiohttp.ClientError as e:
|
|
61
|
-
logging.error(f"Network error occurred: {str(e)}")
|
|
62
|
-
return None
|
|
63
|
-
except asyncio.TimeoutError:
|
|
64
|
-
logging.error("Download timed out")
|
|
65
|
-
return None
|
|
66
|
-
except IOError as e:
|
|
67
|
-
logging.error(f"IO error occurred: {str(e)}")
|
|
68
|
-
return None
|
|
69
|
-
except Exception as e:
|
|
70
|
-
logging.error(f"Unexpected error occurred: {str(e)}")
|
|
71
|
-
return None
|
|
72
|
-
finally:
|
|
73
|
-
# If download failed and file was partially created, clean it up
|
|
74
|
-
if os.path.exists(destination) and os.path.getsize(destination) == 0:
|
|
75
|
-
try:
|
|
76
|
-
os.remove(destination)
|
|
77
|
-
logging.info(f"Cleaned up incomplete download: {destination}")
|
|
78
|
-
except OSError:
|
|
79
|
-
pass
|
|
1
|
+
from typing import List,Optional
|
|
2
|
+
import os, logging, aiohttp, asyncio
|
|
3
|
+
from tqdm.asyncio import tqdm
|
|
4
|
+
|
|
5
|
+
async def download_files(urls: List[str], destination_folder: str, authorization: str = None):
|
|
6
|
+
tasks = [download_file(file, os.path.join(destination_folder, os.path.basename(file)), authorization=authorization) for file in urls]
|
|
7
|
+
results = await asyncio.gather(*tasks, return_exceptions=False)
|
|
8
|
+
for i, result in enumerate(results):
|
|
9
|
+
if not result:
|
|
10
|
+
raise Exception(f"Download failed for file: {urls[i]}")
|
|
11
|
+
|
|
12
|
+
async def download_file(url: str, destination: str, chunk_size: int = 8192, authorization: str = None) -> Optional[str]:
|
|
13
|
+
"""
|
|
14
|
+
Downloads a file from a given URL to a destination path asynchronously.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
url: The URL of the file to download
|
|
18
|
+
destination: The local path where the file should be saved
|
|
19
|
+
chunk_size: Size of chunks to download (default: 8192 bytes)
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
str: Path to the downloaded file if successful, None otherwise
|
|
23
|
+
|
|
24
|
+
Raises:
|
|
25
|
+
Various exceptions are caught and logged
|
|
26
|
+
"""
|
|
27
|
+
try:
|
|
28
|
+
# Ensure the destination directory exists
|
|
29
|
+
os.makedirs(os.path.dirname(os.path.abspath(destination)), exist_ok=True)
|
|
30
|
+
|
|
31
|
+
async with aiohttp.ClientSession() as session:
|
|
32
|
+
if authorization:
|
|
33
|
+
headers = {'Authorization': authorization}
|
|
34
|
+
session.headers.update(headers)
|
|
35
|
+
async with session.get(url) as response:
|
|
36
|
+
# Check if the request was successful
|
|
37
|
+
if response.status != 200:
|
|
38
|
+
logging.error(f"Failed to download file. Status code: {response.status}")
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
# Get the total file size if available
|
|
42
|
+
total_size = int(response.headers.get('content-length', 0))
|
|
43
|
+
# Open the destination file and write chunks
|
|
44
|
+
with open(destination, 'wb') as f:
|
|
45
|
+
with tqdm(
|
|
46
|
+
total=total_size,
|
|
47
|
+
desc="Downloading",
|
|
48
|
+
unit='B',
|
|
49
|
+
unit_scale=True,
|
|
50
|
+
unit_divisor=1024
|
|
51
|
+
) as pbar:
|
|
52
|
+
async for chunk in response.content.iter_chunked(chunk_size):
|
|
53
|
+
if chunk:
|
|
54
|
+
f.write(chunk)
|
|
55
|
+
pbar.update(len(chunk))
|
|
56
|
+
|
|
57
|
+
logging.info(f"File downloaded successfully to {destination}")
|
|
58
|
+
return destination
|
|
59
|
+
|
|
60
|
+
except aiohttp.ClientError as e:
|
|
61
|
+
logging.error(f"Network error occurred: {str(e)}")
|
|
62
|
+
return None
|
|
63
|
+
except asyncio.TimeoutError:
|
|
64
|
+
logging.error("Download timed out")
|
|
65
|
+
return None
|
|
66
|
+
except IOError as e:
|
|
67
|
+
logging.error(f"IO error occurred: {str(e)}")
|
|
68
|
+
return None
|
|
69
|
+
except Exception as e:
|
|
70
|
+
logging.error(f"Unexpected error occurred: {str(e)}")
|
|
71
|
+
return None
|
|
72
|
+
finally:
|
|
73
|
+
# If download failed and file was partially created, clean it up
|
|
74
|
+
if os.path.exists(destination) and os.path.getsize(destination) == 0:
|
|
75
|
+
try:
|
|
76
|
+
os.remove(destination)
|
|
77
|
+
logging.info(f"Cleaned up incomplete download: {destination}")
|
|
78
|
+
except OSError:
|
|
79
|
+
pass
|
|
@@ -1,29 +1,29 @@
|
|
|
1
|
-
import os, sys, json
|
|
2
|
-
|
|
3
|
-
class HiddenPrints:
|
|
4
|
-
def __enter__(self):
|
|
5
|
-
self._original_stdout = sys.stdout
|
|
6
|
-
self._original_stderr = sys.stderr
|
|
7
|
-
|
|
8
|
-
sys.stdout = open(os.devnull, 'w')
|
|
9
|
-
sys.stderr = open(os.devnull, 'w')
|
|
10
|
-
|
|
11
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
12
|
-
sys.stdout.close()
|
|
13
|
-
sys.stderr.close()
|
|
14
|
-
sys.stdout = self._original_stdout
|
|
15
|
-
sys.stderr = self._original_stderr
|
|
16
|
-
|
|
17
|
-
def printJson(data) -> str:
|
|
18
|
-
return f"{json.dumps(data, indent=2, sort_keys=True)},"
|
|
19
|
-
|
|
20
|
-
def printSingleJson(data) -> str:
|
|
21
|
-
return f"{json.dumps(data, indent=2, sort_keys=True)}"
|
|
22
|
-
|
|
23
|
-
def printString(data: str) -> str:
|
|
24
|
-
if data != "":
|
|
25
|
-
return printJson(data)
|
|
26
|
-
|
|
27
|
-
def printSingleString(data: str) -> str:
|
|
28
|
-
if data != "":
|
|
29
|
-
return printSingleJson(data)
|
|
1
|
+
import os, sys, json
|
|
2
|
+
|
|
3
|
+
class HiddenPrints:
|
|
4
|
+
def __enter__(self):
|
|
5
|
+
self._original_stdout = sys.stdout
|
|
6
|
+
self._original_stderr = sys.stderr
|
|
7
|
+
|
|
8
|
+
sys.stdout = open(os.devnull, 'w')
|
|
9
|
+
sys.stderr = open(os.devnull, 'w')
|
|
10
|
+
|
|
11
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
12
|
+
sys.stdout.close()
|
|
13
|
+
sys.stderr.close()
|
|
14
|
+
sys.stdout = self._original_stdout
|
|
15
|
+
sys.stderr = self._original_stderr
|
|
16
|
+
|
|
17
|
+
def printJson(data) -> str:
|
|
18
|
+
return f"{json.dumps(data, indent=2, sort_keys=True)},"
|
|
19
|
+
|
|
20
|
+
def printSingleJson(data) -> str:
|
|
21
|
+
return f"{json.dumps(data, indent=2, sort_keys=True)}"
|
|
22
|
+
|
|
23
|
+
def printString(data: str) -> str:
|
|
24
|
+
if data != "":
|
|
25
|
+
return printJson(data)
|
|
26
|
+
|
|
27
|
+
def printSingleString(data: str) -> str:
|
|
28
|
+
if data != "":
|
|
29
|
+
return printSingleJson(data)
|
|
@@ -1,137 +1,137 @@
|
|
|
1
|
-
import os, gc, shutil, logging, traceback
|
|
2
|
-
import asyncio, aiofiles, aiofiles.os
|
|
3
|
-
from fastapi import HTTPException
|
|
4
|
-
from fastapi.responses import StreamingResponse
|
|
5
|
-
from langchain_core.documents import Document
|
|
6
|
-
from ws_bom_robot_app.llm.vector_store.loader.base import Loader
|
|
7
|
-
from ws_bom_robot_app.llm.models.api import RulesRequest, KbRequest, VectorDbResponse
|
|
8
|
-
from ws_bom_robot_app.llm.vector_store.integration.manager import IntegrationManager
|
|
9
|
-
from ws_bom_robot_app.llm.utils.faiss_helper import FaissHelper
|
|
10
|
-
from ws_bom_robot_app.config import config
|
|
11
|
-
from ws_bom_robot_app.llm.models.kb import load_endpoints
|
|
12
|
-
from ws_bom_robot_app.llm.utils.download import download_files
|
|
13
|
-
|
|
14
|
-
async def _cleanup_directory(directory_path: str):
|
|
15
|
-
if os.path.exists(directory_path):
|
|
16
|
-
await asyncio.to_thread(shutil.rmtree, directory_path)
|
|
17
|
-
|
|
18
|
-
#@timer
|
|
19
|
-
async def rules(rq: RulesRequest) -> VectorDbResponse:
|
|
20
|
-
api_key = rq.api_key()
|
|
21
|
-
_config = rq.config()
|
|
22
|
-
db_name = rq.out_name()
|
|
23
|
-
store_path = os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_store, db_name)
|
|
24
|
-
try:
|
|
25
|
-
await FaissHelper.create([Document(page_content=rule, metadata={"source": "rules"}) for rule in rq.rules], store_path, api_key) #type: ignore
|
|
26
|
-
db_file_path = shutil.make_archive(os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_out, db_name), "zip", store_path)
|
|
27
|
-
return VectorDbResponse(file = os.path.basename(db_file_path))
|
|
28
|
-
except Exception as e:
|
|
29
|
-
await _cleanup_directory(store_path)
|
|
30
|
-
return VectorDbResponse(success = False, error = str(e))
|
|
31
|
-
finally:
|
|
32
|
-
gc.collect()
|
|
33
|
-
|
|
34
|
-
#@atimer
|
|
35
|
-
async def kb(rq: KbRequest) -> VectorDbResponse:
|
|
36
|
-
os.environ['MPLCONFIGDIR'] = './tmp/.matplotlib'
|
|
37
|
-
api_key = rq.api_key()
|
|
38
|
-
_config = rq.config()
|
|
39
|
-
db_name = rq.out_name()
|
|
40
|
-
src_path = os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_src)
|
|
41
|
-
working_path = os.path.join(src_path, db_name)
|
|
42
|
-
|
|
43
|
-
if all([not rq.files,not rq.endpoints,not rq.integrations]):
|
|
44
|
-
return VectorDbResponse(success = False, error = "No files, endpoints or integrations provided")
|
|
45
|
-
else:
|
|
46
|
-
await aiofiles.os.makedirs(src_path, exist_ok=True)
|
|
47
|
-
await aiofiles.os.makedirs(working_path, exist_ok=True)
|
|
48
|
-
|
|
49
|
-
documents: list[Document] = []
|
|
50
|
-
# Download/copy all files
|
|
51
|
-
if rq.files:
|
|
52
|
-
try:
|
|
53
|
-
loaders = Loader(working_path)
|
|
54
|
-
filter_file_extensions = loaders.managed_file_extensions()
|
|
55
|
-
files_to_download = [file for file in rq.files if not os.path.exists(os.path.join(src_path, os.path.basename(file)))]
|
|
56
|
-
if files_to_download:
|
|
57
|
-
await download_files(
|
|
58
|
-
[f"{_config.robot_cms_host}/{_config.robot_cms_kb_folder}/{os.path.basename(file)}" for file in files_to_download if any([file.endswith(ext) for ext in filter_file_extensions])],
|
|
59
|
-
src_path, authorization=_config.robot_cms_auth)
|
|
60
|
-
# copy files to working tmp folder
|
|
61
|
-
for file in rq.files:
|
|
62
|
-
async with aiofiles.open(os.path.join(src_path, os.path.basename(file)), 'rb') as src_file:
|
|
63
|
-
async with aiofiles.open(os.path.join(working_path, os.path.basename(file)), 'wb') as dest_file:
|
|
64
|
-
await dest_file.write(await src_file.read())
|
|
65
|
-
#load files
|
|
66
|
-
try:
|
|
67
|
-
documents.extend(await loaders.load())
|
|
68
|
-
except Exception as e:
|
|
69
|
-
tb = traceback.format_exc()
|
|
70
|
-
_error = f"File loader failure: {e} | {tb}"
|
|
71
|
-
logging.warning(_error)
|
|
72
|
-
return VectorDbResponse(success = False, error = _error)
|
|
73
|
-
except Exception as e:
|
|
74
|
-
await _cleanup_directory(working_path)
|
|
75
|
-
return VectorDbResponse(success = False, error = f"Failed to download file {e}")
|
|
76
|
-
|
|
77
|
-
if rq.endpoints:
|
|
78
|
-
try:
|
|
79
|
-
documents.extend(await load_endpoints(rq.endpoints, working_path))
|
|
80
|
-
except Exception as e:
|
|
81
|
-
await _cleanup_directory(working_path)
|
|
82
|
-
tb = traceback.format_exc()
|
|
83
|
-
_error = f"Endpoint failure: {e} | {tb}"
|
|
84
|
-
logging.warning(_error)
|
|
85
|
-
return VectorDbResponse(success = False, error = _error)
|
|
86
|
-
|
|
87
|
-
if rq.integrations:
|
|
88
|
-
tasks = []
|
|
89
|
-
for integration in rq.integrations:
|
|
90
|
-
tasks.append(
|
|
91
|
-
IntegrationManager
|
|
92
|
-
.get_strategy(integration.type.lower(), working_path, integration.__pydantic_extra__) #type: ignore
|
|
93
|
-
.load()
|
|
94
|
-
)
|
|
95
|
-
try:
|
|
96
|
-
integration_documents = await asyncio.gather(*tasks)
|
|
97
|
-
for docs in integration_documents:
|
|
98
|
-
documents.extend(docs)
|
|
99
|
-
except Exception as e:
|
|
100
|
-
await _cleanup_directory(working_path)
|
|
101
|
-
tb = traceback.format_exc()
|
|
102
|
-
_error = f"Integration failure: {e} | {tb}"
|
|
103
|
-
logging.warning(_error)
|
|
104
|
-
return VectorDbResponse(success=False, error=_error)
|
|
105
|
-
|
|
106
|
-
#cleanup
|
|
107
|
-
await _cleanup_directory(working_path)
|
|
108
|
-
|
|
109
|
-
if documents and len(documents) > 0:
|
|
110
|
-
try:
|
|
111
|
-
store_path = os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_store, db_name)
|
|
112
|
-
db_file_path = await aiofiles.os.wrap(shutil.make_archive)(
|
|
113
|
-
os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_out, db_name),
|
|
114
|
-
"zip",
|
|
115
|
-
await FaissHelper.create(documents, store_path, api_key, return_folder_path=True)
|
|
116
|
-
)
|
|
117
|
-
return VectorDbResponse(file = os.path.basename(db_file_path))
|
|
118
|
-
except Exception as e:
|
|
119
|
-
await _cleanup_directory(store_path)
|
|
120
|
-
return VectorDbResponse(success = False, error = str(e))
|
|
121
|
-
finally:
|
|
122
|
-
del documents
|
|
123
|
-
gc.collect()
|
|
124
|
-
else:
|
|
125
|
-
_error = "No documents found in the knowledgebase folder"
|
|
126
|
-
logging.warning(_error)
|
|
127
|
-
return VectorDbResponse(success = False, error = _error)
|
|
128
|
-
|
|
129
|
-
async def kb_stream_file(filename: str):
|
|
130
|
-
file_path = os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_out, filename)
|
|
131
|
-
if not os.path.isfile(file_path):
|
|
132
|
-
raise HTTPException(status_code=404, detail="File not found")
|
|
133
|
-
def iter_file():
|
|
134
|
-
with open(file_path, mode="rb") as file:
|
|
135
|
-
while chunk := file.read(1024*8):
|
|
136
|
-
yield chunk
|
|
137
|
-
return StreamingResponse(iter_file(), media_type="application/octet-stream", headers={"Content-Disposition": f"attachment; filename={filename}"})
|
|
1
|
+
import os, gc, shutil, logging, traceback
|
|
2
|
+
import asyncio, aiofiles, aiofiles.os
|
|
3
|
+
from fastapi import HTTPException
|
|
4
|
+
from fastapi.responses import StreamingResponse
|
|
5
|
+
from langchain_core.documents import Document
|
|
6
|
+
from ws_bom_robot_app.llm.vector_store.loader.base import Loader
|
|
7
|
+
from ws_bom_robot_app.llm.models.api import RulesRequest, KbRequest, VectorDbResponse
|
|
8
|
+
from ws_bom_robot_app.llm.vector_store.integration.manager import IntegrationManager
|
|
9
|
+
from ws_bom_robot_app.llm.utils.faiss_helper import FaissHelper
|
|
10
|
+
from ws_bom_robot_app.config import config
|
|
11
|
+
from ws_bom_robot_app.llm.models.kb import load_endpoints
|
|
12
|
+
from ws_bom_robot_app.llm.utils.download import download_files
|
|
13
|
+
|
|
14
|
+
async def _cleanup_directory(directory_path: str):
|
|
15
|
+
if os.path.exists(directory_path):
|
|
16
|
+
await asyncio.to_thread(shutil.rmtree, directory_path)
|
|
17
|
+
|
|
18
|
+
#@timer
|
|
19
|
+
async def rules(rq: RulesRequest) -> VectorDbResponse:
|
|
20
|
+
api_key = rq.api_key()
|
|
21
|
+
_config = rq.config()
|
|
22
|
+
db_name = rq.out_name()
|
|
23
|
+
store_path = os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_store, db_name)
|
|
24
|
+
try:
|
|
25
|
+
await FaissHelper.create([Document(page_content=rule, metadata={"source": "rules"}) for rule in rq.rules], store_path, api_key) #type: ignore
|
|
26
|
+
db_file_path = shutil.make_archive(os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_out, db_name), "zip", store_path)
|
|
27
|
+
return VectorDbResponse(file = os.path.basename(db_file_path))
|
|
28
|
+
except Exception as e:
|
|
29
|
+
await _cleanup_directory(store_path)
|
|
30
|
+
return VectorDbResponse(success = False, error = str(e))
|
|
31
|
+
finally:
|
|
32
|
+
gc.collect()
|
|
33
|
+
|
|
34
|
+
#@atimer
|
|
35
|
+
async def kb(rq: KbRequest) -> VectorDbResponse:
|
|
36
|
+
os.environ['MPLCONFIGDIR'] = './tmp/.matplotlib'
|
|
37
|
+
api_key = rq.api_key()
|
|
38
|
+
_config = rq.config()
|
|
39
|
+
db_name = rq.out_name()
|
|
40
|
+
src_path = os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_src)
|
|
41
|
+
working_path = os.path.join(src_path, db_name)
|
|
42
|
+
|
|
43
|
+
if all([not rq.files,not rq.endpoints,not rq.integrations]):
|
|
44
|
+
return VectorDbResponse(success = False, error = "No files, endpoints or integrations provided")
|
|
45
|
+
else:
|
|
46
|
+
await aiofiles.os.makedirs(src_path, exist_ok=True)
|
|
47
|
+
await aiofiles.os.makedirs(working_path, exist_ok=True)
|
|
48
|
+
|
|
49
|
+
documents: list[Document] = []
|
|
50
|
+
# Download/copy all files
|
|
51
|
+
if rq.files:
|
|
52
|
+
try:
|
|
53
|
+
loaders = Loader(working_path)
|
|
54
|
+
filter_file_extensions = loaders.managed_file_extensions()
|
|
55
|
+
files_to_download = [file for file in rq.files if not os.path.exists(os.path.join(src_path, os.path.basename(file)))]
|
|
56
|
+
if files_to_download:
|
|
57
|
+
await download_files(
|
|
58
|
+
[f"{_config.robot_cms_host}/{_config.robot_cms_kb_folder}/{os.path.basename(file)}" for file in files_to_download if any([file.endswith(ext) for ext in filter_file_extensions])],
|
|
59
|
+
src_path, authorization=_config.robot_cms_auth)
|
|
60
|
+
# copy files to working tmp folder
|
|
61
|
+
for file in rq.files:
|
|
62
|
+
async with aiofiles.open(os.path.join(src_path, os.path.basename(file)), 'rb') as src_file:
|
|
63
|
+
async with aiofiles.open(os.path.join(working_path, os.path.basename(file)), 'wb') as dest_file:
|
|
64
|
+
await dest_file.write(await src_file.read())
|
|
65
|
+
#load files
|
|
66
|
+
try:
|
|
67
|
+
documents.extend(await loaders.load())
|
|
68
|
+
except Exception as e:
|
|
69
|
+
tb = traceback.format_exc()
|
|
70
|
+
_error = f"File loader failure: {e} | {tb}"
|
|
71
|
+
logging.warning(_error)
|
|
72
|
+
return VectorDbResponse(success = False, error = _error)
|
|
73
|
+
except Exception as e:
|
|
74
|
+
await _cleanup_directory(working_path)
|
|
75
|
+
return VectorDbResponse(success = False, error = f"Failed to download file {e}")
|
|
76
|
+
|
|
77
|
+
if rq.endpoints:
|
|
78
|
+
try:
|
|
79
|
+
documents.extend(await load_endpoints(rq.endpoints, working_path))
|
|
80
|
+
except Exception as e:
|
|
81
|
+
await _cleanup_directory(working_path)
|
|
82
|
+
tb = traceback.format_exc()
|
|
83
|
+
_error = f"Endpoint failure: {e} | {tb}"
|
|
84
|
+
logging.warning(_error)
|
|
85
|
+
return VectorDbResponse(success = False, error = _error)
|
|
86
|
+
|
|
87
|
+
if rq.integrations:
|
|
88
|
+
tasks = []
|
|
89
|
+
for integration in rq.integrations:
|
|
90
|
+
tasks.append(
|
|
91
|
+
IntegrationManager
|
|
92
|
+
.get_strategy(integration.type.lower(), working_path, integration.__pydantic_extra__) #type: ignore
|
|
93
|
+
.load()
|
|
94
|
+
)
|
|
95
|
+
try:
|
|
96
|
+
integration_documents = await asyncio.gather(*tasks)
|
|
97
|
+
for docs in integration_documents:
|
|
98
|
+
documents.extend(docs)
|
|
99
|
+
except Exception as e:
|
|
100
|
+
await _cleanup_directory(working_path)
|
|
101
|
+
tb = traceback.format_exc()
|
|
102
|
+
_error = f"Integration failure: {e} | {tb}"
|
|
103
|
+
logging.warning(_error)
|
|
104
|
+
return VectorDbResponse(success=False, error=_error)
|
|
105
|
+
|
|
106
|
+
#cleanup
|
|
107
|
+
await _cleanup_directory(working_path)
|
|
108
|
+
|
|
109
|
+
if documents and len(documents) > 0:
|
|
110
|
+
try:
|
|
111
|
+
store_path = os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_store, db_name)
|
|
112
|
+
db_file_path = await aiofiles.os.wrap(shutil.make_archive)(
|
|
113
|
+
os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_out, db_name),
|
|
114
|
+
"zip",
|
|
115
|
+
await FaissHelper.create(documents, store_path, api_key, return_folder_path=True)
|
|
116
|
+
)
|
|
117
|
+
return VectorDbResponse(file = os.path.basename(db_file_path))
|
|
118
|
+
except Exception as e:
|
|
119
|
+
await _cleanup_directory(store_path)
|
|
120
|
+
return VectorDbResponse(success = False, error = str(e))
|
|
121
|
+
finally:
|
|
122
|
+
del documents
|
|
123
|
+
gc.collect()
|
|
124
|
+
else:
|
|
125
|
+
_error = "No documents found in the knowledgebase folder"
|
|
126
|
+
logging.warning(_error)
|
|
127
|
+
return VectorDbResponse(success = False, error = _error)
|
|
128
|
+
|
|
129
|
+
async def kb_stream_file(filename: str):
|
|
130
|
+
file_path = os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_out, filename)
|
|
131
|
+
if not os.path.isfile(file_path):
|
|
132
|
+
raise HTTPException(status_code=404, detail="File not found")
|
|
133
|
+
def iter_file():
|
|
134
|
+
with open(file_path, mode="rb") as file:
|
|
135
|
+
while chunk := file.read(1024*8):
|
|
136
|
+
yield chunk
|
|
137
|
+
return StreamingResponse(iter_file(), media_type="application/octet-stream", headers={"Content-Disposition": f"attachment; filename={filename}"})
|
|
@@ -35,12 +35,12 @@ class Loader():
|
|
|
35
35
|
'.eml': LoaderConfig(loader=UnstructuredEmailLoader,kwargs={"strategy":"auto", "process_attachments": False}),
|
|
36
36
|
'.msg': LoaderConfig(loader=UnstructuredEmailLoader,kwargs={"strategy":"auto", "process_attachments": False}),
|
|
37
37
|
'.epub': None,
|
|
38
|
-
'.md': LoaderConfig(loader=TextLoader),
|
|
38
|
+
'.md': LoaderConfig(loader=TextLoader, kwargs={"autodetect_encoding": True}),
|
|
39
39
|
'.org': None,
|
|
40
40
|
'.odt': None,
|
|
41
41
|
'.ppt': None,
|
|
42
42
|
'.pptx': LoaderConfig(loader=UnstructuredPowerPointLoader,kwargs={"strategy":"auto"}), #docling issue with WMF https://github.com/DS4SD/docling/issues/594
|
|
43
|
-
'.txt': LoaderConfig(loader=TextLoader),
|
|
43
|
+
'.txt': LoaderConfig(loader=TextLoader, kwargs={"autodetect_encoding": True}),
|
|
44
44
|
'.rst': None,
|
|
45
45
|
'.rtf': None,
|
|
46
46
|
'.tsv': None,
|
|
@@ -1,25 +1,25 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from typing import Optional
|
|
3
|
-
from langchain_core.documents import Document
|
|
4
|
-
from langchain_community.document_loaders.base import BaseLoader
|
|
5
|
-
|
|
6
|
-
class JsonLoader(BaseLoader):
|
|
7
|
-
def __init__(self, file_path: str, meta_fields:Optional[list[str]] = [],encoding: Optional[str] = "utf-8"):
|
|
8
|
-
self.file_path = file_path
|
|
9
|
-
self.meta_fields = meta_fields
|
|
10
|
-
self.encoding = encoding
|
|
11
|
-
|
|
12
|
-
def load(self) -> list[Document]:
|
|
13
|
-
with open(self.file_path, "r", encoding=self.encoding) as file:
|
|
14
|
-
data = json.load(file)
|
|
15
|
-
_list = data if isinstance(data, list) else [data]
|
|
16
|
-
return [
|
|
17
|
-
Document(
|
|
18
|
-
page_content=json.dumps(item),
|
|
19
|
-
metadata={
|
|
20
|
-
"source": self.file_path,
|
|
21
|
-
**{field: item.get(field) for field in self.meta_fields if item.get(field)}
|
|
22
|
-
}
|
|
23
|
-
)
|
|
24
|
-
for item in _list
|
|
25
|
-
]
|
|
1
|
+
import json
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from langchain_core.documents import Document
|
|
4
|
+
from langchain_community.document_loaders.base import BaseLoader
|
|
5
|
+
|
|
6
|
+
class JsonLoader(BaseLoader):
|
|
7
|
+
def __init__(self, file_path: str, meta_fields:Optional[list[str]] = [],encoding: Optional[str] = "utf-8"):
|
|
8
|
+
self.file_path = file_path
|
|
9
|
+
self.meta_fields = meta_fields
|
|
10
|
+
self.encoding = encoding
|
|
11
|
+
|
|
12
|
+
def load(self) -> list[Document]:
|
|
13
|
+
with open(self.file_path, "r", encoding=self.encoding) as file:
|
|
14
|
+
data = json.load(file)
|
|
15
|
+
_list = data if isinstance(data, list) else [data]
|
|
16
|
+
return [
|
|
17
|
+
Document(
|
|
18
|
+
page_content=json.dumps(item),
|
|
19
|
+
metadata={
|
|
20
|
+
"source": self.file_path,
|
|
21
|
+
**{field: item.get(field) for field in self.meta_fields if item.get(field)}
|
|
22
|
+
}
|
|
23
|
+
)
|
|
24
|
+
for item in _list
|
|
25
|
+
]
|