ws-bom-robot-app 0.0.102__py3-none-any.whl → 0.0.104__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ws_bom_robot_app/llm/agent_context.py +1 -1
- ws_bom_robot_app/llm/agent_description.py +123 -123
- ws_bom_robot_app/llm/agent_handler.py +176 -176
- ws_bom_robot_app/llm/agent_lcel.py +107 -107
- ws_bom_robot_app/llm/defaut_prompt.py +15 -15
- ws_bom_robot_app/llm/feedbacks/feedback_manager.py +66 -66
- ws_bom_robot_app/llm/main.py +159 -159
- ws_bom_robot_app/llm/models/feedback.py +30 -30
- ws_bom_robot_app/llm/nebuly_handler.py +185 -185
- ws_bom_robot_app/llm/tools/tool_builder.py +68 -68
- ws_bom_robot_app/llm/tools/tool_manager.py +343 -343
- ws_bom_robot_app/llm/tools/utils.py +41 -41
- ws_bom_robot_app/llm/utils/agent.py +34 -34
- ws_bom_robot_app/llm/utils/chunker.py +77 -15
- ws_bom_robot_app/llm/utils/cms.py +123 -123
- ws_bom_robot_app/llm/utils/download.py +183 -183
- ws_bom_robot_app/llm/utils/print.py +29 -29
- ws_bom_robot_app/llm/vector_store/db/chroma.py +1 -0
- ws_bom_robot_app/llm/vector_store/generator.py +137 -137
- ws_bom_robot_app/llm/vector_store/integration/shopify.py +143 -143
- ws_bom_robot_app/llm/vector_store/integration/thron.py +236 -236
- ws_bom_robot_app/llm/vector_store/loader/docling.py +3 -2
- ws_bom_robot_app/llm/vector_store/loader/json_loader.py +25 -25
- {ws_bom_robot_app-0.0.102.dist-info → ws_bom_robot_app-0.0.104.dist-info}/METADATA +364 -364
- {ws_bom_robot_app-0.0.102.dist-info → ws_bom_robot_app-0.0.104.dist-info}/RECORD +27 -27
- {ws_bom_robot_app-0.0.102.dist-info → ws_bom_robot_app-0.0.104.dist-info}/WHEEL +0 -0
- {ws_bom_robot_app-0.0.102.dist-info → ws_bom_robot_app-0.0.104.dist-info}/top_level.txt +0 -0
|
@@ -1,41 +1,41 @@
|
|
|
1
|
-
import random, os
|
|
2
|
-
from langchain_openai import ChatOpenAI
|
|
3
|
-
from langchain_core.prompts import PromptTemplate
|
|
4
|
-
from ws_bom_robot_app.llm.providers.llm_manager import LlmInterface
|
|
5
|
-
from ws_bom_robot_app.llm.utils.print import print_string
|
|
6
|
-
|
|
7
|
-
def __print_output(data: str) -> str:
|
|
8
|
-
return print_string(data) if os.environ.get("AGENT_HANDLER_FORMATTED") == str(True) else f"{data} "
|
|
9
|
-
|
|
10
|
-
def getRandomWaitingMessage(waiting_messages: str, traduction: bool = True) -> str:
|
|
11
|
-
if not waiting_messages: return ""
|
|
12
|
-
messages = [msg.strip() for msg in waiting_messages.split(";") if msg.strip()]
|
|
13
|
-
if not messages: return ""
|
|
14
|
-
chosen_message = random.choice(messages) + "\n"
|
|
15
|
-
if not traduction:
|
|
16
|
-
return __print_output(chosen_message)
|
|
17
|
-
return chosen_message
|
|
18
|
-
|
|
19
|
-
async def translate_text(llm: LlmInterface, language, text: str, callbacks: list) -> str:
|
|
20
|
-
if language == "it":
|
|
21
|
-
return __print_output(text)
|
|
22
|
-
sys_message = """Il tuo compito è di tradurre il testo_da_tradurre nella seguente lingua: \n\n lingua: {language}\n\n testo_da_tradurre: {testo_da_tradurre} \n\nTraduci il testo_da_tradurre nella lingua {language} senza aggiungere altro:"""
|
|
23
|
-
prompt = PromptTemplate.from_template(sys_message)
|
|
24
|
-
chain = prompt | llm.get_llm()
|
|
25
|
-
await chain.ainvoke({"language":language, "testo_da_tradurre": text}, {"callbacks": callbacks})
|
|
26
|
-
|
|
27
|
-
async def fetch_page(session, url):
|
|
28
|
-
try:
|
|
29
|
-
async with session.get(url, timeout=10, ssl=False) as response:
|
|
30
|
-
if response.status == 200:
|
|
31
|
-
text = await response.text()
|
|
32
|
-
return {"url": url, "html": text}
|
|
33
|
-
else:
|
|
34
|
-
return {"url": url, "html": None}
|
|
35
|
-
except Exception as e:
|
|
36
|
-
return {"url": url, "html": None}
|
|
37
|
-
|
|
38
|
-
async def extract_content_with_trafilatura(html):
|
|
39
|
-
"""Estrae solo il testo principale usando trafilatura"""
|
|
40
|
-
import trafilatura
|
|
41
|
-
return trafilatura.extract(html)
|
|
1
|
+
import random, os
|
|
2
|
+
from langchain_openai import ChatOpenAI
|
|
3
|
+
from langchain_core.prompts import PromptTemplate
|
|
4
|
+
from ws_bom_robot_app.llm.providers.llm_manager import LlmInterface
|
|
5
|
+
from ws_bom_robot_app.llm.utils.print import print_string
|
|
6
|
+
|
|
7
|
+
def __print_output(data: str) -> str:
|
|
8
|
+
return print_string(data) if os.environ.get("AGENT_HANDLER_FORMATTED") == str(True) else f"{data} "
|
|
9
|
+
|
|
10
|
+
def getRandomWaitingMessage(waiting_messages: str, traduction: bool = True) -> str:
|
|
11
|
+
if not waiting_messages: return ""
|
|
12
|
+
messages = [msg.strip() for msg in waiting_messages.split(";") if msg.strip()]
|
|
13
|
+
if not messages: return ""
|
|
14
|
+
chosen_message = random.choice(messages) + "\n"
|
|
15
|
+
if not traduction:
|
|
16
|
+
return __print_output(chosen_message)
|
|
17
|
+
return chosen_message
|
|
18
|
+
|
|
19
|
+
async def translate_text(llm: LlmInterface, language, text: str, callbacks: list) -> str:
|
|
20
|
+
if language == "it":
|
|
21
|
+
return __print_output(text)
|
|
22
|
+
sys_message = """Il tuo compito è di tradurre il testo_da_tradurre nella seguente lingua: \n\n lingua: {language}\n\n testo_da_tradurre: {testo_da_tradurre} \n\nTraduci il testo_da_tradurre nella lingua {language} senza aggiungere altro:"""
|
|
23
|
+
prompt = PromptTemplate.from_template(sys_message)
|
|
24
|
+
chain = prompt | llm.get_llm()
|
|
25
|
+
await chain.ainvoke({"language":language, "testo_da_tradurre": text}, {"callbacks": callbacks})
|
|
26
|
+
|
|
27
|
+
async def fetch_page(session, url):
|
|
28
|
+
try:
|
|
29
|
+
async with session.get(url, timeout=10, ssl=False) as response:
|
|
30
|
+
if response.status == 200:
|
|
31
|
+
text = await response.text()
|
|
32
|
+
return {"url": url, "html": text}
|
|
33
|
+
else:
|
|
34
|
+
return {"url": url, "html": None}
|
|
35
|
+
except Exception as e:
|
|
36
|
+
return {"url": url, "html": None}
|
|
37
|
+
|
|
38
|
+
async def extract_content_with_trafilatura(html):
|
|
39
|
+
"""Estrae solo il testo principale usando trafilatura"""
|
|
40
|
+
import trafilatura
|
|
41
|
+
return trafilatura.extract(html)
|
|
@@ -1,34 +1,34 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from langchain_core.embeddings import Embeddings
|
|
3
|
-
from ws_bom_robot_app.llm.models.api import LlmRules
|
|
4
|
-
from ws_bom_robot_app.llm.utils.print import HiddenPrints
|
|
5
|
-
from ws_bom_robot_app.llm.vector_store.db.manager import VectorDbManager
|
|
6
|
-
import warnings
|
|
7
|
-
|
|
8
|
-
async def get_rules(embeddings: Embeddings, rules: LlmRules, query: str | list) -> str:
|
|
9
|
-
with warnings.catch_warnings():
|
|
10
|
-
warnings.simplefilter("ignore", category=Warning)
|
|
11
|
-
# check if the input is multimodal and convert it to text
|
|
12
|
-
if isinstance(query, list):
|
|
13
|
-
query = " ".join(obj.get("text", "") for obj in query)
|
|
14
|
-
# check if the input is empty or the rules are not provided
|
|
15
|
-
if any([query=="",rules is None,rules and rules.vector_db == "",rules and not os.path.exists(rules.vector_db)]):
|
|
16
|
-
return ""
|
|
17
|
-
# get the rules from the vector db and return prompt with rules
|
|
18
|
-
rules_prompt = ""
|
|
19
|
-
rules_doc = await VectorDbManager.get_strategy(rules.vector_type).invoke(
|
|
20
|
-
embeddings,
|
|
21
|
-
rules.vector_db,
|
|
22
|
-
query,
|
|
23
|
-
search_type="similarity_score_threshold",
|
|
24
|
-
search_kwargs={
|
|
25
|
-
"score_threshold": rules.threshold,
|
|
26
|
-
"k": 500,
|
|
27
|
-
"fetch_k": 500,
|
|
28
|
-
},
|
|
29
|
-
source = None) #type: ignore
|
|
30
|
-
if len(rules_doc) > 0:
|
|
31
|
-
rules_prompt = "\nFollow this rules: \n RULES: \n"
|
|
32
|
-
for rule_doc in rules_doc:
|
|
33
|
-
rules_prompt += "- " + rule_doc.page_content + "\n"
|
|
34
|
-
return rules_prompt
|
|
1
|
+
import os
|
|
2
|
+
from langchain_core.embeddings import Embeddings
|
|
3
|
+
from ws_bom_robot_app.llm.models.api import LlmRules
|
|
4
|
+
from ws_bom_robot_app.llm.utils.print import HiddenPrints
|
|
5
|
+
from ws_bom_robot_app.llm.vector_store.db.manager import VectorDbManager
|
|
6
|
+
import warnings
|
|
7
|
+
|
|
8
|
+
async def get_rules(embeddings: Embeddings, rules: LlmRules, query: str | list) -> str:
|
|
9
|
+
with warnings.catch_warnings():
|
|
10
|
+
warnings.simplefilter("ignore", category=Warning)
|
|
11
|
+
# check if the input is multimodal and convert it to text
|
|
12
|
+
if isinstance(query, list):
|
|
13
|
+
query = " ".join(obj.get("text", "") for obj in query)
|
|
14
|
+
# check if the input is empty or the rules are not provided
|
|
15
|
+
if any([query=="",rules is None,rules and rules.vector_db == "",rules and not os.path.exists(rules.vector_db)]):
|
|
16
|
+
return ""
|
|
17
|
+
# get the rules from the vector db and return prompt with rules
|
|
18
|
+
rules_prompt = ""
|
|
19
|
+
rules_doc = await VectorDbManager.get_strategy(rules.vector_type).invoke(
|
|
20
|
+
embeddings,
|
|
21
|
+
rules.vector_db,
|
|
22
|
+
query,
|
|
23
|
+
search_type="similarity_score_threshold",
|
|
24
|
+
search_kwargs={
|
|
25
|
+
"score_threshold": rules.threshold,
|
|
26
|
+
"k": 500,
|
|
27
|
+
"fetch_k": 500,
|
|
28
|
+
},
|
|
29
|
+
source = None) #type: ignore
|
|
30
|
+
if len(rules_doc) > 0:
|
|
31
|
+
rules_prompt = "\nFollow this rules: \n RULES: \n"
|
|
32
|
+
for rule_doc in rules_doc:
|
|
33
|
+
rules_prompt += "- " + rule_doc.page_content + "\n"
|
|
34
|
+
return rules_prompt
|
|
@@ -1,20 +1,82 @@
|
|
|
1
1
|
from langchain_core.documents import Document
|
|
2
|
-
from langchain_text_splitters import
|
|
3
|
-
import logging
|
|
2
|
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
4
3
|
|
|
5
4
|
class DocumentChunker:
|
|
6
|
-
_MAX_CHUNK_SIZE = 10_000
|
|
7
5
|
@staticmethod
|
|
8
6
|
def chunk(documents: list[Document]) -> list[Document]:
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
7
|
+
return DocumentChunker.chunk_recursive(documents)
|
|
8
|
+
|
|
9
|
+
@staticmethod
|
|
10
|
+
def chunk_recursive(documents: list[Document], chunk_size: int=3_000) -> list[Document]:
|
|
11
|
+
"""
|
|
12
|
+
Recursively split documents into smaller chunks while preserving metadata.
|
|
13
|
+
|
|
14
|
+
This function takes a list of documents and splits them into smaller chunks using
|
|
15
|
+
RecursiveCharacterTextSplitter. Documents smaller than the chunk size are kept intact,
|
|
16
|
+
while larger documents are split into multiple chunks with overlapping content.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
documents (list[Document]): A list of Document objects to be chunked.
|
|
20
|
+
chunk_size (int, optional): The maximum size of each chunk in characters.
|
|
21
|
+
Defaults to 3,000.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
list[Document]: A list of Document objects where each document's content is
|
|
25
|
+
at most chunk_size characters. Each chunk preserves the metadata from
|
|
26
|
+
its original document.
|
|
27
|
+
|
|
28
|
+
Notes:
|
|
29
|
+
- Chunk overlap is automatically set to 10% of the chunk_size to maintain
|
|
30
|
+
context between chunks.
|
|
31
|
+
- Documents smaller than or equal to chunk_size are returned unchanged.
|
|
32
|
+
- Metadata from the original document is copied to all resulting chunks.
|
|
33
|
+
"""
|
|
34
|
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=int(chunk_size//10))
|
|
35
|
+
chunked_documents = []
|
|
36
|
+
for doc in documents:
|
|
37
|
+
if len(doc.page_content) <= chunk_size:
|
|
38
|
+
chunked_documents.append(doc)
|
|
39
|
+
continue
|
|
40
|
+
chunks = text_splitter.split_text(doc.page_content)
|
|
41
|
+
for chunk in chunks:
|
|
42
|
+
chunked_documents.append(
|
|
43
|
+
Document(page_content=chunk, metadata=doc.metadata)
|
|
44
|
+
)
|
|
45
|
+
return chunked_documents
|
|
46
|
+
|
|
47
|
+
@staticmethod
|
|
48
|
+
def chunk_token(documents: list[Document], max_tokens: int=1_000) -> list[Document]:
|
|
49
|
+
"""
|
|
50
|
+
Splits a list of documents into smaller chunks based on token count.
|
|
51
|
+
|
|
52
|
+
This function takes a list of Document objects and splits them into smaller chunks
|
|
53
|
+
using a recursive character text splitter based on tiktoken encoding. Each chunk
|
|
54
|
+
respects the maximum token limit while maintaining some overlap between consecutive
|
|
55
|
+
chunks for context preservation.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
documents (list[Document]): A list of Document objects to be chunked. Each Document
|
|
59
|
+
should have 'page_content' (str) and 'metadata' (dict) attributes.
|
|
60
|
+
max_tokens (int, optional): The maximum number of tokens allowed per chunk.
|
|
61
|
+
Defaults to 1,000. The chunk overlap is automatically set to 10% of this value.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
list[Document]: A list of new Document objects where each document represents a chunk
|
|
65
|
+
of the original documents. Each chunked Document preserves the metadata from its
|
|
66
|
+
source document.
|
|
67
|
+
|
|
68
|
+
Note:
|
|
69
|
+
- Uses the "cl100k_base" tiktoken encoding (commonly used for GPT-4 and similar models)
|
|
70
|
+
- Chunk overlap is set to max_tokens // 10 to maintain context between chunks
|
|
71
|
+
- Original document metadata is preserved in all generated chunks
|
|
72
|
+
"""
|
|
73
|
+
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(encoding_name="cl100k_base",chunk_size=max_tokens, chunk_overlap=max_tokens//10)
|
|
74
|
+
chunked_documents = []
|
|
75
|
+
for doc in documents:
|
|
76
|
+
chunks = text_splitter.split_text(doc.page_content)
|
|
77
|
+
for chunk in chunks:
|
|
78
|
+
chunked_documents.append(
|
|
79
|
+
Document(page_content=chunk, metadata=doc.metadata)
|
|
80
|
+
)
|
|
81
|
+
return chunked_documents
|
|
82
|
+
|
|
@@ -1,123 +1,123 @@
|
|
|
1
|
-
import logging, aiohttp
|
|
2
|
-
from typing import Any, List, Optional
|
|
3
|
-
from pydantic import AliasChoices, BaseModel, ConfigDict, Field
|
|
4
|
-
from ws_bom_robot_app.llm.models.api import LlmAppTool, LlmRules, StreamRequest
|
|
5
|
-
from ws_bom_robot_app.llm.models.kb import LlmKbEndpoint, LlmKbIntegration
|
|
6
|
-
from ws_bom_robot_app.util import cache_with_ttl
|
|
7
|
-
|
|
8
|
-
class CmsAppCredential(BaseModel):
|
|
9
|
-
app_key: str = Field(..., description="The app key for the credential", validation_alias=AliasChoices("appKey","app_key"))
|
|
10
|
-
api_key: str = Field(..., description="The api key for the credential", validation_alias=AliasChoices("apiKey","api_key"))
|
|
11
|
-
model_config = ConfigDict(extra='ignore')
|
|
12
|
-
class CmsApp(BaseModel):
|
|
13
|
-
id: str = Field(..., description="Unique identifier for the app")
|
|
14
|
-
name: str = Field(..., description="Name of the app")
|
|
15
|
-
mode: str
|
|
16
|
-
prompt_samples: Optional[List[str]]
|
|
17
|
-
credentials: CmsAppCredential = None
|
|
18
|
-
rq: StreamRequest
|
|
19
|
-
kb: Optional[Any] = None
|
|
20
|
-
model_config = ConfigDict(extra='ignore')
|
|
21
|
-
|
|
22
|
-
@cache_with_ttl(600) # Cache for 10 minutes
|
|
23
|
-
async def get_apps() -> list[CmsApp]:
|
|
24
|
-
import json
|
|
25
|
-
from ws_bom_robot_app.config import config
|
|
26
|
-
class DictObject(object):
|
|
27
|
-
def __init__(self, dict_):
|
|
28
|
-
self.__dict__.update(dict_)
|
|
29
|
-
def __repr__(self):
|
|
30
|
-
return json.dumps(self.__dict__)
|
|
31
|
-
@classmethod
|
|
32
|
-
def from_dict(cls, d):
|
|
33
|
-
return json.loads(json.dumps(d), object_hook=DictObject)
|
|
34
|
-
def __attr(obj, *attrs, default=None):
|
|
35
|
-
for attr in attrs:
|
|
36
|
-
obj = getattr(obj, attr, default)
|
|
37
|
-
if obj is None:
|
|
38
|
-
break
|
|
39
|
-
return obj
|
|
40
|
-
def __to_dict(obj):
|
|
41
|
-
"""Converts DictObject to dict recursively"""
|
|
42
|
-
if isinstance(obj, DictObject):
|
|
43
|
-
return {k: __to_dict(v) for k, v in obj.__dict__.items()}
|
|
44
|
-
elif isinstance(obj, list):
|
|
45
|
-
return [__to_dict(item) for item in obj]
|
|
46
|
-
else:
|
|
47
|
-
return obj
|
|
48
|
-
host = config.robot_cms_host
|
|
49
|
-
if host:
|
|
50
|
-
url = f"{host}/api/llmApp?depth=1&pagination=false&locale=it"
|
|
51
|
-
auth = config.robot_cms_auth
|
|
52
|
-
headers = {"Authorization": auth} if auth else {}
|
|
53
|
-
async with aiohttp.ClientSession() as session:
|
|
54
|
-
async with session.get(url, headers=headers) as response:
|
|
55
|
-
if response.status == 200:
|
|
56
|
-
_apps=[]
|
|
57
|
-
cms_apps = await response.json()
|
|
58
|
-
for cms_app in cms_apps:
|
|
59
|
-
if __attr(cms_app,"isActive",default=True) == True:
|
|
60
|
-
_cms_app_dict = DictObject.from_dict(cms_app)
|
|
61
|
-
try:
|
|
62
|
-
_app: CmsApp = CmsApp(
|
|
63
|
-
id=_cms_app_dict.id,
|
|
64
|
-
name=_cms_app_dict.name,
|
|
65
|
-
mode=_cms_app_dict.mode,
|
|
66
|
-
prompt_samples=[__attr(sample,'sampleInputText') or f"{sample.__dict__}" for sample in _cms_app_dict.contents.sampleInputTexts],
|
|
67
|
-
credentials=CmsAppCredential(app_key=_cms_app_dict.settings.credentials.appKey,api_key=_cms_app_dict.settings.credentials.apiKey),
|
|
68
|
-
rq=StreamRequest(
|
|
69
|
-
#thread_id=str(uuid.uuid1()),
|
|
70
|
-
messages=[],
|
|
71
|
-
secrets={
|
|
72
|
-
"apiKey": __attr(_cms_app_dict.settings,'llmConfig','secrets','apiKey', default=''),
|
|
73
|
-
"langChainApiKey": __attr(_cms_app_dict.settings,'llmConfig','secrets','langChainApiKey', default=''),
|
|
74
|
-
"nebulyApiKey": __attr(_cms_app_dict.settings,'llmConfig','secrets','nebulyApiKey', default=''),
|
|
75
|
-
},
|
|
76
|
-
system_message=__attr(_cms_app_dict.settings,'llmConfig','prompt','prompt','systemMessage') if __attr(_cms_app_dict.settings,'llmConfig','prompt','prompt','systemMessage') else __attr(_cms_app_dict.settings,'llmConfig','prompt','systemMessage'),
|
|
77
|
-
provider= __attr(_cms_app_dict.settings,'llmConfig','provider') or 'openai',
|
|
78
|
-
model= __attr(_cms_app_dict.settings,'llmConfig','model') or 'gpt-4o',
|
|
79
|
-
temperature=_cms_app_dict.settings.llmConfig.temperature or 0,
|
|
80
|
-
app_tools=[LlmAppTool(**tool) for tool in cms_app.get('settings').get('appTools',[])],
|
|
81
|
-
rules=LlmRules(
|
|
82
|
-
vector_type=__attr(_cms_app_dict.settings,'rules','vectorDbType', default='faiss'),
|
|
83
|
-
vector_db=__attr(_cms_app_dict.settings,'rules','vectorDbFile','filename'),
|
|
84
|
-
threshold=__attr(_cms_app_dict.settings,'rules','threshold', default=0.7)
|
|
85
|
-
) if __attr(_cms_app_dict.settings,'rules','vectorDbFile','filename') else None,
|
|
86
|
-
#fine_tuned_model=__attr(_cms_app_dict.settings,'llmConfig','fineTunedModel'),
|
|
87
|
-
lang_chain_tracing= __attr(_cms_app_dict.settings,'llmConfig','langChainTracing', default=False),
|
|
88
|
-
lang_chain_project= __attr(_cms_app_dict.settings,'llmConfig','langChainProject', default=''),
|
|
89
|
-
output_structure= __to_dict(__attr(_cms_app_dict.settings,'llmConfig','outputStructure')) if __attr(_cms_app_dict.settings,'llmConfig','outputStructure') else None
|
|
90
|
-
))
|
|
91
|
-
except Exception as e:
|
|
92
|
-
import traceback
|
|
93
|
-
ex = traceback.format_exc()
|
|
94
|
-
logging.error(f"Error creating CmsApp {_cms_app_dict.name} from dict: {e}\n{ex}")
|
|
95
|
-
continue
|
|
96
|
-
if _app.rq.app_tools:
|
|
97
|
-
for tool in _app.rq.app_tools:
|
|
98
|
-
_knowledgeBase = tool.knowledgeBase
|
|
99
|
-
tool.integrations = [LlmKbIntegration(**item) for item in _knowledgeBase.get('integrations')] if _knowledgeBase.get('integrations') else []
|
|
100
|
-
try:
|
|
101
|
-
tool.endpoints = [LlmKbEndpoint(**item) for item in _knowledgeBase.get('externalEndpoints')] if _knowledgeBase.get('externalEndpoints') else []
|
|
102
|
-
except Exception as e:
|
|
103
|
-
logging.error(f"Error parsing endpoints for app {_cms_app_dict.name} tool {tool.name}: {e}")
|
|
104
|
-
tool.vector_db = _knowledgeBase.get('vectorDbFile').get('filename') if _knowledgeBase.get('vectorDbFile') else None
|
|
105
|
-
tool.vector_type = _knowledgeBase.get('vectorDbType') if _knowledgeBase.get('vectorDbType') else 'faiss'
|
|
106
|
-
del tool.knowledgeBase
|
|
107
|
-
_apps.append(_app)
|
|
108
|
-
return _apps
|
|
109
|
-
else:
|
|
110
|
-
logging.error(f"Error fetching cms apps: {response.status}")
|
|
111
|
-
else:
|
|
112
|
-
logging.error("robot_cms_host environment variable is not set.")
|
|
113
|
-
return []
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
async def get_app_by_id(app_id: str) -> CmsApp | None:
|
|
117
|
-
apps = await get_apps()
|
|
118
|
-
app = next((a for a in apps if a.id == app_id), None)
|
|
119
|
-
if app:
|
|
120
|
-
return app
|
|
121
|
-
else:
|
|
122
|
-
logging.error(f"App with id {app_id} not found.")
|
|
123
|
-
return None
|
|
1
|
+
import logging, aiohttp
|
|
2
|
+
from typing import Any, List, Optional
|
|
3
|
+
from pydantic import AliasChoices, BaseModel, ConfigDict, Field
|
|
4
|
+
from ws_bom_robot_app.llm.models.api import LlmAppTool, LlmRules, StreamRequest
|
|
5
|
+
from ws_bom_robot_app.llm.models.kb import LlmKbEndpoint, LlmKbIntegration
|
|
6
|
+
from ws_bom_robot_app.util import cache_with_ttl
|
|
7
|
+
|
|
8
|
+
class CmsAppCredential(BaseModel):
|
|
9
|
+
app_key: str = Field(..., description="The app key for the credential", validation_alias=AliasChoices("appKey","app_key"))
|
|
10
|
+
api_key: str = Field(..., description="The api key for the credential", validation_alias=AliasChoices("apiKey","api_key"))
|
|
11
|
+
model_config = ConfigDict(extra='ignore')
|
|
12
|
+
class CmsApp(BaseModel):
|
|
13
|
+
id: str = Field(..., description="Unique identifier for the app")
|
|
14
|
+
name: str = Field(..., description="Name of the app")
|
|
15
|
+
mode: str
|
|
16
|
+
prompt_samples: Optional[List[str]]
|
|
17
|
+
credentials: CmsAppCredential = None
|
|
18
|
+
rq: StreamRequest
|
|
19
|
+
kb: Optional[Any] = None
|
|
20
|
+
model_config = ConfigDict(extra='ignore')
|
|
21
|
+
|
|
22
|
+
@cache_with_ttl(600) # Cache for 10 minutes
|
|
23
|
+
async def get_apps() -> list[CmsApp]:
|
|
24
|
+
import json
|
|
25
|
+
from ws_bom_robot_app.config import config
|
|
26
|
+
class DictObject(object):
|
|
27
|
+
def __init__(self, dict_):
|
|
28
|
+
self.__dict__.update(dict_)
|
|
29
|
+
def __repr__(self):
|
|
30
|
+
return json.dumps(self.__dict__)
|
|
31
|
+
@classmethod
|
|
32
|
+
def from_dict(cls, d):
|
|
33
|
+
return json.loads(json.dumps(d), object_hook=DictObject)
|
|
34
|
+
def __attr(obj, *attrs, default=None):
|
|
35
|
+
for attr in attrs:
|
|
36
|
+
obj = getattr(obj, attr, default)
|
|
37
|
+
if obj is None:
|
|
38
|
+
break
|
|
39
|
+
return obj
|
|
40
|
+
def __to_dict(obj):
|
|
41
|
+
"""Converts DictObject to dict recursively"""
|
|
42
|
+
if isinstance(obj, DictObject):
|
|
43
|
+
return {k: __to_dict(v) for k, v in obj.__dict__.items()}
|
|
44
|
+
elif isinstance(obj, list):
|
|
45
|
+
return [__to_dict(item) for item in obj]
|
|
46
|
+
else:
|
|
47
|
+
return obj
|
|
48
|
+
host = config.robot_cms_host
|
|
49
|
+
if host:
|
|
50
|
+
url = f"{host}/api/llmApp?depth=1&pagination=false&locale=it"
|
|
51
|
+
auth = config.robot_cms_auth
|
|
52
|
+
headers = {"Authorization": auth} if auth else {}
|
|
53
|
+
async with aiohttp.ClientSession() as session:
|
|
54
|
+
async with session.get(url, headers=headers) as response:
|
|
55
|
+
if response.status == 200:
|
|
56
|
+
_apps=[]
|
|
57
|
+
cms_apps = await response.json()
|
|
58
|
+
for cms_app in cms_apps:
|
|
59
|
+
if __attr(cms_app,"isActive",default=True) == True:
|
|
60
|
+
_cms_app_dict = DictObject.from_dict(cms_app)
|
|
61
|
+
try:
|
|
62
|
+
_app: CmsApp = CmsApp(
|
|
63
|
+
id=_cms_app_dict.id,
|
|
64
|
+
name=_cms_app_dict.name,
|
|
65
|
+
mode=_cms_app_dict.mode,
|
|
66
|
+
prompt_samples=[__attr(sample,'sampleInputText') or f"{sample.__dict__}" for sample in _cms_app_dict.contents.sampleInputTexts],
|
|
67
|
+
credentials=CmsAppCredential(app_key=_cms_app_dict.settings.credentials.appKey,api_key=_cms_app_dict.settings.credentials.apiKey),
|
|
68
|
+
rq=StreamRequest(
|
|
69
|
+
#thread_id=str(uuid.uuid1()),
|
|
70
|
+
messages=[],
|
|
71
|
+
secrets={
|
|
72
|
+
"apiKey": __attr(_cms_app_dict.settings,'llmConfig','secrets','apiKey', default=''),
|
|
73
|
+
"langChainApiKey": __attr(_cms_app_dict.settings,'llmConfig','secrets','langChainApiKey', default=''),
|
|
74
|
+
"nebulyApiKey": __attr(_cms_app_dict.settings,'llmConfig','secrets','nebulyApiKey', default=''),
|
|
75
|
+
},
|
|
76
|
+
system_message=__attr(_cms_app_dict.settings,'llmConfig','prompt','prompt','systemMessage') if __attr(_cms_app_dict.settings,'llmConfig','prompt','prompt','systemMessage') else __attr(_cms_app_dict.settings,'llmConfig','prompt','systemMessage'),
|
|
77
|
+
provider= __attr(_cms_app_dict.settings,'llmConfig','provider') or 'openai',
|
|
78
|
+
model= __attr(_cms_app_dict.settings,'llmConfig','model') or 'gpt-4o',
|
|
79
|
+
temperature=_cms_app_dict.settings.llmConfig.temperature or 0,
|
|
80
|
+
app_tools=[LlmAppTool(**tool) for tool in cms_app.get('settings').get('appTools',[])],
|
|
81
|
+
rules=LlmRules(
|
|
82
|
+
vector_type=__attr(_cms_app_dict.settings,'rules','vectorDbType', default='faiss'),
|
|
83
|
+
vector_db=__attr(_cms_app_dict.settings,'rules','vectorDbFile','filename'),
|
|
84
|
+
threshold=__attr(_cms_app_dict.settings,'rules','threshold', default=0.7)
|
|
85
|
+
) if __attr(_cms_app_dict.settings,'rules','vectorDbFile','filename') else None,
|
|
86
|
+
#fine_tuned_model=__attr(_cms_app_dict.settings,'llmConfig','fineTunedModel'),
|
|
87
|
+
lang_chain_tracing= __attr(_cms_app_dict.settings,'llmConfig','langChainTracing', default=False),
|
|
88
|
+
lang_chain_project= __attr(_cms_app_dict.settings,'llmConfig','langChainProject', default=''),
|
|
89
|
+
output_structure= __to_dict(__attr(_cms_app_dict.settings,'llmConfig','outputStructure')) if __attr(_cms_app_dict.settings,'llmConfig','outputStructure') else None
|
|
90
|
+
))
|
|
91
|
+
except Exception as e:
|
|
92
|
+
import traceback
|
|
93
|
+
ex = traceback.format_exc()
|
|
94
|
+
logging.error(f"Error creating CmsApp {_cms_app_dict.name} from dict: {e}\n{ex}")
|
|
95
|
+
continue
|
|
96
|
+
if _app.rq.app_tools:
|
|
97
|
+
for tool in _app.rq.app_tools:
|
|
98
|
+
_knowledgeBase = tool.knowledgeBase
|
|
99
|
+
tool.integrations = [LlmKbIntegration(**item) for item in _knowledgeBase.get('integrations')] if _knowledgeBase.get('integrations') else []
|
|
100
|
+
try:
|
|
101
|
+
tool.endpoints = [LlmKbEndpoint(**item) for item in _knowledgeBase.get('externalEndpoints')] if _knowledgeBase.get('externalEndpoints') else []
|
|
102
|
+
except Exception as e:
|
|
103
|
+
logging.error(f"Error parsing endpoints for app {_cms_app_dict.name} tool {tool.name}: {e}")
|
|
104
|
+
tool.vector_db = _knowledgeBase.get('vectorDbFile').get('filename') if _knowledgeBase.get('vectorDbFile') else None
|
|
105
|
+
tool.vector_type = _knowledgeBase.get('vectorDbType') if _knowledgeBase.get('vectorDbType') else 'faiss'
|
|
106
|
+
del tool.knowledgeBase
|
|
107
|
+
_apps.append(_app)
|
|
108
|
+
return _apps
|
|
109
|
+
else:
|
|
110
|
+
logging.error(f"Error fetching cms apps: {response.status}")
|
|
111
|
+
else:
|
|
112
|
+
logging.error("robot_cms_host environment variable is not set.")
|
|
113
|
+
return []
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
async def get_app_by_id(app_id: str) -> CmsApp | None:
|
|
117
|
+
apps = await get_apps()
|
|
118
|
+
app = next((a for a in apps if a.id == app_id), None)
|
|
119
|
+
if app:
|
|
120
|
+
return app
|
|
121
|
+
else:
|
|
122
|
+
logging.error(f"App with id {app_id} not found.")
|
|
123
|
+
return None
|