ws-bom-robot-app 0.0.104__py3-none-any.whl → 0.0.106__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,343 +1,419 @@
1
- from asyncio import Queue
2
- import aiohttp, re
3
- from typing import Optional, Type, Callable
4
- from ws_bom_robot_app.config import config
5
- from ws_bom_robot_app.llm.models.api import LlmApp,LlmAppTool
6
- from ws_bom_robot_app.llm.providers.llm_manager import LlmInterface
7
- from ws_bom_robot_app.llm.utils.cms import CmsApp, get_app_by_id
8
- from ws_bom_robot_app.llm.vector_store.db.manager import VectorDbManager
9
- from ws_bom_robot_app.llm.tools.utils import getRandomWaitingMessage, translate_text
10
- from ws_bom_robot_app.llm.tools.models.main import NoopInput,DocumentRetrieverInput,ImageGeneratorInput,LlmChainInput,SearchOnlineInput,EmailSenderInput
11
- from pydantic import BaseModel, ConfigDict
12
-
13
- class ToolConfig(BaseModel):
14
- function: Callable
15
- model: Optional[Type[BaseModel]] = NoopInput
16
- model_config = ConfigDict(
17
- arbitrary_types_allowed=True
18
- )
19
-
20
- class ToolManager:
21
- """
22
- ToolManager is responsible for managing various tools used in the application.
23
-
24
- Attributes:
25
- app_tool (LlmAppTool): The application tool configuration.
26
- api_key (str): The API key for accessing external services.
27
- callbacks (list): A list of callback functions to be executed.
28
-
29
- Methods:
30
- document_retriever(query: str): Asynchronously retrieves documents based on the query.
31
- image_generator(query: str, language: str = "it"): Asynchronously generates an image based on the query.
32
- get_coroutine(): Retrieves the coroutine function based on the tool configuration.
33
- """
34
-
35
- def __init__(
36
- self,
37
- llm: LlmInterface,
38
- app_tool: LlmAppTool,
39
- callbacks: list,
40
- queue: Optional[Queue] = None
41
- ):
42
- self.llm = llm
43
- self.app_tool = app_tool
44
- self.callbacks = callbacks
45
- self.queue = queue
46
-
47
- async def __extract_documents(self, query: str, app_tool: LlmAppTool):
48
- search_type = "similarity"
49
- search_kwargs = {"k": 4}
50
- if app_tool.search_settings:
51
- search_settings = app_tool.search_settings # type: ignore
52
- if search_settings.search_type == "similarityScoreThreshold":
53
- search_type = "similarity_score_threshold"
54
- search_kwargs = {
55
- "score_threshold": search_settings.score_threshold_id if search_settings.score_threshold_id else 0.5,
56
- "k": search_settings.search_k if search_settings.search_k else 100
57
- }
58
- elif search_settings.search_type == "mmr":
59
- search_type = "mmr"
60
- search_kwargs = {"k": search_settings.search_k if search_settings.search_k else 4}
61
- elif search_settings.search_type == "default":
62
- search_type = "similarity"
63
- search_kwargs = {"k": search_settings.search_k if search_settings.search_k else 4}
64
- else:
65
- search_type = "mixed"
66
- search_kwargs = {"k": search_settings.search_k if search_settings.search_k else 4}
67
- if self.queue:
68
- await self.queue.put(getRandomWaitingMessage(app_tool.waiting_message, traduction=False))
69
-
70
- return await VectorDbManager.get_strategy(app_tool.vector_type).invoke(
71
- self.llm.get_embeddings(),
72
- app_tool.vector_db,
73
- query,
74
- search_type,
75
- search_kwargs,
76
- app_tool=app_tool,
77
- llm=self.llm.get_llm(),
78
- source=app_tool.function_id,
79
- )
80
-
81
- #region functions
82
- async def document_retriever(self, query: str) -> list:
83
- """
84
- Asynchronously retrieves documents based on the provided query using the specified search settings.
85
-
86
- Args:
87
- query (str): The search query string.
88
-
89
- Returns:
90
- list: A list of retrieved documents based on the search criteria.
91
-
92
- Raises:
93
- ValueError: If the configuration for the tool is invalid or the vector database is not found.
94
-
95
- Notes:
96
- - The function supports different search types such as "similarity", "similarity_score_threshold", "mmr", and "mixed".
97
- - The search settings can be customized through the `app_tool.search_settings` attribute.
98
- - If a queue is provided, a waiting message is put into the queue before invoking the search.
99
- """
100
- if (
101
- self.app_tool.type == "function" and self.app_tool.vector_db
102
- #and self.settings.get("dataSource") == "knowledgebase"
103
- ):
104
- return await self.__extract_documents(query, self.app_tool)
105
-
106
- async def image_generator(self, query: str, language: str = "it"):
107
- """
108
- Asynchronously generates an image based on the query.
109
- set OPENAI_API_KEY in your environment variables
110
- """
111
- from langchain_community.utilities.dalle_image_generator import DallEAPIWrapper
112
- model = self.app_tool.model or "dall-e-3"
113
- random_waiting_message = getRandomWaitingMessage(self.app_tool.waiting_message, traduction=False)
114
- if not language:
115
- language = "it"
116
- await translate_text(
117
- self.llm, language, random_waiting_message, self.callbacks
118
- )
119
- try:
120
- #set os.environ.get("OPENAI_API_KEY")!
121
- image_url = DallEAPIWrapper(model=model).run(query) # type: ignore
122
- return image_url
123
- except Exception as e:
124
- return f"Error: {str(e)}"
125
-
126
- async def llm_chain(self, input: str):
127
- if self.app_tool.type == "llmChain":
128
- from langchain_core.prompts import ChatPromptTemplate
129
- from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
130
- from pydantic import create_model
131
- system_message = self.app_tool.llm_chain_settings.prompt.format(
132
- thread_id = self.app_tool.thread_id if self.app_tool.thread_id else "no-thread-id",
133
- )
134
- context = []
135
- if self.app_tool.data_source == "knowledgebase":
136
- context = await self.__extract_documents(input, self.app_tool)
137
- if len(context) > 0:
138
- for doc in context:
139
- system_message += f"\n\nContext:\n{doc.metadata.get("source", "")}: {doc.page_content}"
140
- # Determine output parser and format based on output type
141
- output_type = self.app_tool.llm_chain_settings.outputStructure.get("outputType")
142
- is_json_output = output_type == "json"
143
-
144
- if is_json_output:
145
- output_format = self.app_tool.llm_chain_settings.outputStructure.get("outputFormat", {})
146
- json_schema = create_model('json_schema', **{k: (type(v), ...) for k, v in output_format.items()})
147
- output_parser = JsonOutputParser(pydantic_object=json_schema)
148
- system_message += "\n\nFormat instructions:\n{format_instructions}".strip()
149
- else:
150
- output_parser = StrOutputParser()
151
- # Create prompt template with or without format instructions
152
- base_messages = [
153
- ("system", system_message),
154
- ("user", "{input}")
155
- ]
156
- if is_json_output:
157
- prompt = ChatPromptTemplate.from_messages(base_messages).partial(
158
- format_instructions=output_parser.get_format_instructions()
159
- )
160
- else:
161
- prompt = ChatPromptTemplate.from_messages(base_messages)
162
- model = self.app_tool.llm_chain_settings.model
163
- self.llm.config.model = model
164
- llm = self.llm.get_llm()
165
- llm.tags = ["llm_chain"]
166
- chain = prompt | llm | output_parser
167
- result = await chain.ainvoke({"input": input})
168
- return result
169
-
170
- async def proxy_app_chat(self, query: str) -> str | None:
171
- from ws_bom_robot_app.llm.models.api import LlmMessage
172
- secrets = self.app_tool.secrets_to_dict()
173
- app_id = secrets.get("appId")
174
- if not app_id:
175
- raise ValueError("Tool configuration is invalid. 'appId' is required.")
176
- app: CmsApp = await get_app_by_id(app_id)
177
- if not app:
178
- raise ValueError(f"App with id {app_id} not found.")
179
- # message
180
- app.rq.messages.append(LlmMessage(role="user", content=query))
181
- # tracing
182
- if str(secrets.get("disable_tracing", False)).lower() in ['1','true','yes']:
183
- app.rq.lang_chain_tracing = False
184
- app.rq.lang_chain_project = ''
185
- app.rq.secrets['nebulyApiKey'] = ''
186
- # http: for debugging purposes
187
- if str(secrets.get("use_http", False)).lower() in ['1','true','yes']:
188
- import base64
189
- url = f"http://localhost:{config.runtime_options().tcp_port}/api/llm/stream/raw"
190
- auth = f"Basic {base64.b64encode((config.robot_user + ':' + config.robot_password).encode('utf-8')).decode('utf-8')}"
191
- headers = {"Authorization": auth} if auth else {}
192
- async with aiohttp.ClientSession() as session:
193
- _data = app.rq.model_dump(mode='json',by_alias=True,exclude_unset=True,exclude_none=True, exclude_defaults=True)
194
- async with session.post(url, json=_data, headers=headers) as response:
195
- if response.status == 200:
196
- return await response.text()
197
- else:
198
- raise ValueError(f"Error fetching chat response: {response.status}")
199
- return None
200
- else: # default
201
- try:
202
- from ws_bom_robot_app.llm.main import stream
203
- import json
204
- chunks = []
205
- async for chunk in stream(rq=app.rq, ctx=None, formatted=False):
206
- chunks.append(chunk)
207
- rs = ''.join(chunks) if chunks else None
208
-
209
- # if the app has output_structure, parse the JSON and return dict
210
- if rs and app.rq.output_structure:
211
- try:
212
- cleaned_rs = re.sub(r'^```(?:json)?\s*\n?', '', rs.strip())
213
- cleaned_rs = re.sub(r'\n?```\s*$', '', cleaned_rs)
214
- return json.loads(cleaned_rs)
215
- except json.JSONDecodeError:
216
- print(f"[!] Failed to parse JSON output from proxy_app_chat: {rs}")
217
- return rs
218
- return rs
219
- except Exception as e:
220
- print(f"[!] Error in proxy_app_chat: {e}")
221
- return None
222
-
223
- async def proxy_app_tool(self) -> None:
224
- return None
225
-
226
- async def _fetch_urls(self, urls: list[str]) -> list[dict]:
227
- import aiohttp, asyncio
228
- from ws_bom_robot_app.llm.tools.utils import fetch_page, extract_content_with_trafilatura
229
- if not urls:
230
- return []
231
- async with aiohttp.ClientSession() as session:
232
- tasks = [fetch_page(session, url) for url in urls]
233
- responses = await asyncio.gather(*tasks, return_exceptions=True)
234
- final_results = []
235
- for item in responses:
236
- if isinstance(item, Exception):
237
- continue
238
- url = item["url"]
239
- html = item["html"]
240
- if html:
241
- content = await extract_content_with_trafilatura(html)
242
- if content:
243
- final_results.append({"url": url, "content": content})
244
- else:
245
- final_results.append({"url": url, "content": "No content found"})
246
- else:
247
- final_results.append({"url": url, "content": "Page not found"})
248
- return final_results
249
-
250
- async def search_online(self, query: str) -> list[dict]:
251
- from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
252
- # Wrapper DuckDuckGo
253
- search = DuckDuckGoSearchAPIWrapper(max_results=10)
254
- try:
255
- raw_results = search.results(query, max_results=10)
256
- except Exception as e:
257
- return f"[!] Errore ricerca: {e}"
258
- urls = [r["link"] for r in raw_results]
259
- return await self._fetch_urls(urls)
260
-
261
- async def search_online_google(self, query: str) -> list[dict]:
262
- from langchain_google_community import GoogleSearchAPIWrapper
263
- secrets = self.app_tool.secrets_to_dict()
264
- search_type = secrets.get("searchType")
265
- if search_type:
266
- search_kwargs = {"searchType" : search_type}
267
- search = GoogleSearchAPIWrapper(
268
- google_api_key=secrets.get("GOOGLE_API_KEY"),
269
- google_cse_id=secrets.get("GOOGLE_CSE_ID"),
270
- )
271
- if search_type:
272
- raw_results = search.results(query=query,
273
- num_results=secrets.get("num_results", 5),
274
- search_params=search_kwargs)
275
- return raw_results
276
- raw_results = search.results(
277
- query=query,
278
- num_results=secrets.get("num_results", 5)
279
- )
280
- urls = [r["link"] for r in raw_results]
281
- return await self._fetch_urls(urls)
282
-
283
- async def send_email(self, email_subject: str, body: str, to_email:str):
284
- import smtplib
285
- from email.mime.multipart import MIMEMultipart
286
- from email.mime.text import MIMEText
287
- secrets = self.app_tool.secrets
288
- secrets = {item["secretId"]: item["secretValue"] for item in secrets}
289
- import urllib.parse as urlparse
290
- url_preview = secrets.get("url_preview", "")
291
- if url_preview and url_preview != "":
292
- message_tread = "Puoi visualizzare la chat su questo indirizzo: " + urlparse.urljoin(url_preview, f"?llmThreadId={self.app_tool.thread_id}")
293
- body = body.replace("##url_preview##", message_tread)
294
- # Email configuration
295
- smtp_server = secrets.get("smtp_server")
296
- smtp_port = secrets.get("smtp_port")
297
- smtp_user = secrets.get("smtp_user")
298
- smtp_password = secrets.get("smtp_password")
299
- from_email = secrets.get("from_email")
300
- if not to_email or to_email == "":
301
- return "No recipient email provided"
302
- if not email_subject or email_subject == "":
303
- return "No email object provided"
304
- # Create the email content
305
- msg = MIMEMultipart()
306
- msg['From'] = from_email
307
- msg['To'] = to_email
308
- msg['Subject'] = email_subject
309
-
310
- # Create the email body
311
- msg.attach(MIMEText(body, 'plain'))
312
-
313
- # Send the email
314
- try:
315
- with smtplib.SMTP(smtp_server, smtp_port) as server:
316
- # Use authentication and SSL only if password is provided
317
- if smtp_password:
318
- server.starttls()
319
- server.login(smtp_user, smtp_password)
320
- server.send_message(msg)
321
- except Exception as e:
322
- return f"Failed to send email: {str(e)}"
323
- return "Email sent successfully"
324
-
325
- #endregion
326
-
327
- #class variables (static)
328
- _list: dict[str,ToolConfig] = {
329
- f"{document_retriever.__name__}": ToolConfig(function=document_retriever, model=DocumentRetrieverInput),
330
- f"{image_generator.__name__}": ToolConfig(function=image_generator, model=ImageGeneratorInput),
331
- f"{llm_chain.__name__}": ToolConfig(function=llm_chain, model=LlmChainInput),
332
- f"{search_online.__name__}": ToolConfig(function=search_online, model=SearchOnlineInput),
333
- f"{search_online_google.__name__}": ToolConfig(function=search_online_google, model=SearchOnlineInput),
334
- f"{send_email.__name__}": ToolConfig(function=send_email, model=EmailSenderInput),
335
- f"{proxy_app_chat.__name__}": ToolConfig(function=proxy_app_chat, model=DocumentRetrieverInput),
336
- f"{proxy_app_tool.__name__}": ToolConfig(function=proxy_app_tool, model=NoopInput),
337
-
338
- }
339
-
340
- #instance methods
341
- def get_coroutine(self):
342
- tool_cfg = self._list.get(self.app_tool.function_name)
343
- return getattr(self, tool_cfg.function.__name__) # type: ignore
1
+ from asyncio import Queue
2
+ import aiohttp, re
3
+ from typing import Optional, Type, Callable
4
+ from ws_bom_robot_app.config import config
5
+ from ws_bom_robot_app.llm.models.api import LlmApp,LlmAppTool
6
+ from ws_bom_robot_app.llm.providers.llm_manager import LlmInterface
7
+ from ws_bom_robot_app.llm.utils.cms import CmsApp, get_app_by_id
8
+ from ws_bom_robot_app.llm.vector_store.db.manager import VectorDbManager
9
+ from ws_bom_robot_app.llm.tools.utils import getRandomWaitingMessage, translate_text
10
+ from ws_bom_robot_app.llm.tools.models.main import NoopInput,DocumentRetrieverInput,ImageGeneratorInput,LlmChainInput,SearchOnlineInput,EmailSenderInput
11
+ from pydantic import BaseModel, ConfigDict
12
+
13
+ class ToolConfig(BaseModel):
14
+ function: Callable
15
+ model: Optional[Type[BaseModel]] = NoopInput
16
+ model_config = ConfigDict(
17
+ arbitrary_types_allowed=True
18
+ )
19
+
20
+ class ToolManager:
21
+ """
22
+ ToolManager is responsible for managing various tools used in the application.
23
+
24
+ Attributes:
25
+ app_tool (LlmAppTool): The application tool configuration.
26
+ api_key (str): The API key for accessing external services.
27
+ callbacks (list): A list of callback functions to be executed.
28
+
29
+ Methods:
30
+ document_retriever(query: str): Asynchronously retrieves documents based on the query.
31
+ image_generator(query: str, language: str = "it"): Asynchronously generates an image based on the query.
32
+ get_coroutine(): Retrieves the coroutine function based on the tool configuration.
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ llm: LlmInterface,
38
+ app_tool: LlmAppTool,
39
+ callbacks: list,
40
+ queue: Optional[Queue] = None
41
+ ):
42
+ self.llm = llm
43
+ self.app_tool = app_tool
44
+ self.callbacks = callbacks
45
+ self.queue = queue
46
+
47
+ async def __extract_documents(self, query: str, app_tool: LlmAppTool):
48
+ search_type = "similarity"
49
+ search_kwargs = {"k": 4}
50
+ if app_tool.search_settings:
51
+ search_settings = app_tool.search_settings # type: ignore
52
+ if search_settings.search_type == "similarityScoreThreshold":
53
+ search_type = "similarity_score_threshold"
54
+ search_kwargs = {
55
+ "score_threshold": search_settings.score_threshold_id if search_settings.score_threshold_id else 0.5,
56
+ "k": search_settings.search_k if search_settings.search_k else 100
57
+ }
58
+ elif search_settings.search_type == "mmr":
59
+ search_type = "mmr"
60
+ search_kwargs = {"k": search_settings.search_k if search_settings.search_k else 4}
61
+ elif search_settings.search_type == "default":
62
+ search_type = "similarity"
63
+ search_kwargs = {"k": search_settings.search_k if search_settings.search_k else 4}
64
+ else:
65
+ search_type = "mixed"
66
+ search_kwargs = {"k": search_settings.search_k if search_settings.search_k else 4}
67
+ if self.queue:
68
+ await self.queue.put(getRandomWaitingMessage(app_tool.waiting_message, traduction=False))
69
+
70
+ return await VectorDbManager.get_strategy(app_tool.vector_type).invoke(
71
+ self.llm.get_embeddings(),
72
+ app_tool.vector_db,
73
+ query,
74
+ search_type,
75
+ search_kwargs,
76
+ app_tool=app_tool,
77
+ llm=self.llm.get_llm(),
78
+ source=app_tool.function_id,
79
+ )
80
+
81
+ async def __download_sqlite_file(self, db_uri: str) -> str:
82
+ """
83
+ Scarica il file SQLite dalla CMS se necessario e restituisce il percorso locale.
84
+ Usa la stessa logica dell'integrazione Sitemap.
85
+
86
+ Args:
87
+ db_uri: URI del database o nome del file SQLite
88
+
89
+ Returns:
90
+ str: URI del database locale (sqlite:///path/to/file.db)
91
+ """
92
+ import os
93
+ from ws_bom_robot_app.config import config
94
+ from ws_bom_robot_app.llm.utils.download import download_file
95
+
96
+ if not db_uri.endswith('.db') and not db_uri.endswith('.sqlite') and not db_uri.endswith('.sqlite3'):
97
+ return db_uri
98
+
99
+ if db_uri.startswith('sqlite:///'):
100
+ file_path = db_uri.replace('sqlite:///', '')
101
+ if os.path.isabs(file_path) and os.path.exists(file_path):
102
+ return db_uri
103
+ filename = os.path.basename(file_path)
104
+ else:
105
+ filename = db_uri
106
+
107
+ db_folder = os.path.join(config.robot_data_folder, 'db')
108
+ os.makedirs(db_folder, exist_ok=True)
109
+
110
+ local_db_path = os.path.join(db_folder, filename)
111
+
112
+ if os.path.exists(local_db_path):
113
+ return f"sqlite:///{local_db_path}"
114
+
115
+ cms_file_url = f"{config.robot_cms_host}/{config.robot_cms_kb_folder}/{filename}"
116
+ auth = config.robot_cms_auth
117
+
118
+ try:
119
+ result = await download_file(cms_file_url, local_db_path, authorization=auth)
120
+ if result:
121
+ return f"sqlite:///{local_db_path}"
122
+ else:
123
+ raise ValueError(f"File SQLite {filename} non trovato nella CMS")
124
+ except Exception as e:
125
+ raise ValueError(f"Errore durante il download del file SQLite {filename}: {str(e)}")
126
+
127
+ async def __query_database(self, query: str, app_tool: LlmAppTool):
128
+ from langchain_community.agent_toolkits.sql.base import create_sql_agent
129
+ from langchain_community.utilities import SQLDatabase
130
+
131
+ secrets = app_tool.secrets_to_dict()
132
+
133
+ db_uri = app_tool.db_settings.connection_string
134
+ additional_prompt = app_tool.db_settings.additionalPrompt
135
+ if not db_uri:
136
+ raise ValueError("Database URI not found in tool secrets")
137
+
138
+ db_uri = await self.__download_sqlite_file(db_uri)
139
+
140
+ db = SQLDatabase.from_uri(db_uri)
141
+ llm = self.llm.get_llm()
142
+
143
+ agent = create_sql_agent(
144
+ llm=llm,
145
+ db=db,
146
+ agent_type="tool-calling",
147
+ suffix=additional_prompt if additional_prompt else None,
148
+ )
149
+
150
+ result = await agent.ainvoke({"input": query}, config={"callbacks": []})
151
+ if result and "output" in result:
152
+ return result["output"]
153
+ return None
154
+
155
+ #region functions
156
+ async def document_retriever(self, query: str) -> list:
157
+ """
158
+ Asynchronously retrieves documents based on the provided query using the specified search settings.
159
+
160
+ Args:
161
+ query (str): The search query string.
162
+
163
+ Returns:
164
+ list: A list of retrieved documents based on the search criteria.
165
+
166
+ Raises:
167
+ ValueError: If the configuration for the tool is invalid or the vector database is not found.
168
+
169
+ Notes:
170
+ - The function supports different search types such as "similarity", "similarity_score_threshold", "mmr", and "mixed".
171
+ - The search settings can be customized through the `app_tool.search_settings` attribute.
172
+ - If a queue is provided, a waiting message is put into the queue before invoking the search.
173
+ """
174
+ if (
175
+ self.app_tool.type == "function" and self.app_tool.vector_db
176
+ and self.app_tool.data_source == "knowledgebase"
177
+ ):
178
+ return await self.__extract_documents(query, self.app_tool)
179
+ elif self.app_tool.type == "function" and self.app_tool.data_source == "database":
180
+ return await self.__query_database(query, self.app_tool)
181
+
182
+ async def image_generator(self, query: str, language: str = "it"):
183
+ """
184
+ Asynchronously generates an image based on the query.
185
+ set OPENAI_API_KEY in your environment variables
186
+ """
187
+ from langchain_community.utilities.dalle_image_generator import DallEAPIWrapper
188
+ model = self.app_tool.model or "dall-e-3"
189
+ random_waiting_message = getRandomWaitingMessage(self.app_tool.waiting_message, traduction=False)
190
+ if not language:
191
+ language = "it"
192
+ await translate_text(
193
+ self.llm, language, random_waiting_message, self.callbacks
194
+ )
195
+ try:
196
+ #set os.environ.get("OPENAI_API_KEY")!
197
+ image_url = DallEAPIWrapper(model=model).run(query) # type: ignore
198
+ return image_url
199
+ except Exception as e:
200
+ return f"Error: {str(e)}"
201
+
202
+ async def llm_chain(self, input: str):
203
+ if self.app_tool.type == "llmChain":
204
+ from langchain_core.prompts import ChatPromptTemplate
205
+ from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
206
+ from pydantic import create_model
207
+ system_message = self.app_tool.llm_chain_settings.prompt.format(
208
+ thread_id = self.app_tool.thread_id if self.app_tool.thread_id else "no-thread-id",
209
+ )
210
+ context = []
211
+ if self.app_tool.data_source == "knowledgebase":
212
+ context = await self.__extract_documents(input, self.app_tool)
213
+ if len(context) > 0:
214
+ for doc in context:
215
+ system_message += f"\n\nContext:\n{doc.metadata.get("source", "")}: {doc.page_content}"
216
+ # Determine output parser and format based on output type
217
+ output_type = self.app_tool.llm_chain_settings.outputStructure.get("outputType")
218
+ is_json_output = output_type == "json"
219
+
220
+ if is_json_output:
221
+ output_format = self.app_tool.llm_chain_settings.outputStructure.get("outputFormat", {})
222
+ json_schema = create_model('json_schema', **{k: (type(v), ...) for k, v in output_format.items()})
223
+ output_parser = JsonOutputParser(pydantic_object=json_schema)
224
+ system_message += "\n\nFormat instructions:\n{format_instructions}".strip()
225
+ else:
226
+ output_parser = StrOutputParser()
227
+ # Create prompt template with or without format instructions
228
+ base_messages = [
229
+ ("system", system_message),
230
+ ("user", "{input}")
231
+ ]
232
+ if is_json_output:
233
+ prompt = ChatPromptTemplate.from_messages(base_messages).partial(
234
+ format_instructions=output_parser.get_format_instructions()
235
+ )
236
+ else:
237
+ prompt = ChatPromptTemplate.from_messages(base_messages)
238
+ model = self.app_tool.llm_chain_settings.model
239
+ self.llm.config.model = model
240
+ llm = self.llm.get_llm()
241
+ llm.tags = ["llm_chain"]
242
+ chain = prompt | llm | output_parser
243
+ result = await chain.ainvoke({"input": input})
244
+ return result
245
+
246
+ async def proxy_app_chat(self, query: str) -> str | None:
247
+ from ws_bom_robot_app.llm.models.api import LlmMessage
248
+ secrets = self.app_tool.secrets_to_dict()
249
+ app_id = secrets.get("appId")
250
+ if not app_id:
251
+ raise ValueError("Tool configuration is invalid. 'appId' is required.")
252
+ app: CmsApp = await get_app_by_id(app_id)
253
+ if not app:
254
+ raise ValueError(f"App with id {app_id} not found.")
255
+ # message
256
+ app.rq.messages.append(LlmMessage(role="user", content=query))
257
+ # tracing
258
+ if str(secrets.get("disable_tracing", False)).lower() in ['1','true','yes']:
259
+ app.rq.lang_chain_tracing = False
260
+ app.rq.lang_chain_project = ''
261
+ app.rq.secrets['nebulyApiKey'] = ''
262
+ # http: for debugging purposes
263
+ if str(secrets.get("use_http", False)).lower() in ['1','true','yes']:
264
+ import base64
265
+ url = f"http://localhost:{config.runtime_options().tcp_port}/api/llm/stream/raw"
266
+ auth = f"Basic {base64.b64encode((config.robot_user + ':' + config.robot_password).encode('utf-8')).decode('utf-8')}"
267
+ headers = {"Authorization": auth} if auth else {}
268
+ async with aiohttp.ClientSession() as session:
269
+ _data = app.rq.model_dump(mode='json',by_alias=True,exclude_unset=True,exclude_none=True, exclude_defaults=True)
270
+ async with session.post(url, json=_data, headers=headers) as response:
271
+ if response.status == 200:
272
+ return await response.text()
273
+ else:
274
+ raise ValueError(f"Error fetching chat response: {response.status}")
275
+ return None
276
+ else: # default
277
+ try:
278
+ from ws_bom_robot_app.llm.main import stream
279
+ import json
280
+ chunks = []
281
+ async for chunk in stream(rq=app.rq, ctx=None, formatted=False):
282
+ chunks.append(chunk)
283
+ rs = ''.join(chunks) if chunks else None
284
+
285
+ # if the app has output_structure, parse the JSON and return dict
286
+ if rs and app.rq.output_structure:
287
+ try:
288
+ cleaned_rs = re.sub(r'^```(?:json)?\s*\n?', '', rs.strip())
289
+ cleaned_rs = re.sub(r'\n?```\s*$', '', cleaned_rs)
290
+ return json.loads(cleaned_rs)
291
+ except json.JSONDecodeError:
292
+ print(f"[!] Failed to parse JSON output from proxy_app_chat: {rs}")
293
+ return rs
294
+ return rs
295
+ except Exception as e:
296
+ print(f"[!] Error in proxy_app_chat: {e}")
297
+ return None
298
+
299
+ async def proxy_app_tool(self) -> None:
300
+ return None
301
+
302
+ async def _fetch_urls(self, urls: list[str]) -> list[dict]:
303
+ import aiohttp, asyncio
304
+ from ws_bom_robot_app.llm.tools.utils import fetch_page, extract_content_with_trafilatura
305
+ if not urls:
306
+ return []
307
+ async with aiohttp.ClientSession() as session:
308
+ tasks = [fetch_page(session, url) for url in urls]
309
+ responses = await asyncio.gather(*tasks, return_exceptions=True)
310
+ final_results = []
311
+ for item in responses:
312
+ if isinstance(item, Exception):
313
+ continue
314
+ url = item["url"]
315
+ html = item["html"]
316
+ if html:
317
+ content = await extract_content_with_trafilatura(html)
318
+ if content:
319
+ final_results.append({"url": url, "content": content})
320
+ else:
321
+ final_results.append({"url": url, "content": "No content found"})
322
+ else:
323
+ final_results.append({"url": url, "content": "Page not found"})
324
+ return final_results
325
+
326
+ async def search_online(self, query: str) -> list[dict]:
327
+ from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
328
+ # Wrapper DuckDuckGo
329
+ search = DuckDuckGoSearchAPIWrapper(max_results=10)
330
+ try:
331
+ raw_results = search.results(query, max_results=10)
332
+ except Exception as e:
333
+ return f"[!] Errore ricerca: {e}"
334
+ urls = [r["link"] for r in raw_results]
335
+ return await self._fetch_urls(urls)
336
+
337
+ async def search_online_google(self, query: str) -> list[dict]:
338
+ from langchain_google_community import GoogleSearchAPIWrapper
339
+ secrets = self.app_tool.secrets_to_dict()
340
+ search_type = secrets.get("searchType")
341
+ if search_type:
342
+ search_kwargs = {"searchType" : search_type}
343
+ search = GoogleSearchAPIWrapper(
344
+ google_api_key=secrets.get("GOOGLE_API_KEY"),
345
+ google_cse_id=secrets.get("GOOGLE_CSE_ID"),
346
+ )
347
+ if search_type:
348
+ raw_results = search.results(query=query,
349
+ num_results=secrets.get("num_results", 5),
350
+ search_params=search_kwargs)
351
+ return raw_results
352
+ raw_results = search.results(
353
+ query=query,
354
+ num_results=secrets.get("num_results", 5)
355
+ )
356
+ urls = [r["link"] for r in raw_results]
357
+ return await self._fetch_urls(urls)
358
+
359
+ async def send_email(self, email_subject: str, body: str, to_email:str):
360
+ import smtplib
361
+ from email.mime.multipart import MIMEMultipart
362
+ from email.mime.text import MIMEText
363
+ secrets = self.app_tool.secrets
364
+ secrets = {item["secretId"]: item["secretValue"] for item in secrets}
365
+ import urllib.parse as urlparse
366
+ url_preview = secrets.get("url_preview", "")
367
+ if url_preview and url_preview != "":
368
+ message_tread = "Puoi visualizzare la chat su questo indirizzo: " + urlparse.urljoin(url_preview, f"?llmThreadId={self.app_tool.thread_id}")
369
+ body = body.replace("##url_preview##", message_tread)
370
+ # Email configuration
371
+ smtp_server = secrets.get("smtp_server")
372
+ smtp_port = secrets.get("smtp_port")
373
+ smtp_user = secrets.get("smtp_user")
374
+ smtp_password = secrets.get("smtp_password")
375
+ from_email = secrets.get("from_email")
376
+ if not to_email or to_email == "":
377
+ return "No recipient email provided"
378
+ if not email_subject or email_subject == "":
379
+ return "No email object provided"
380
+ # Create the email content
381
+ msg = MIMEMultipart()
382
+ msg['From'] = from_email
383
+ msg['To'] = to_email
384
+ msg['Subject'] = email_subject
385
+
386
+ # Create the email body
387
+ msg.attach(MIMEText(body, 'plain'))
388
+
389
+ # Send the email
390
+ try:
391
+ with smtplib.SMTP(smtp_server, smtp_port) as server:
392
+ # Use authentication and SSL only if password is provided
393
+ if smtp_password:
394
+ server.starttls()
395
+ server.login(smtp_user, smtp_password)
396
+ server.send_message(msg)
397
+ except Exception as e:
398
+ return f"Failed to send email: {str(e)}"
399
+ return "Email sent successfully"
400
+
401
+ #endregion
402
+
403
+ #class variables (static)
404
+ _list: dict[str,ToolConfig] = {
405
+ f"{document_retriever.__name__}": ToolConfig(function=document_retriever, model=DocumentRetrieverInput),
406
+ f"{image_generator.__name__}": ToolConfig(function=image_generator, model=ImageGeneratorInput),
407
+ f"{llm_chain.__name__}": ToolConfig(function=llm_chain, model=LlmChainInput),
408
+ f"{search_online.__name__}": ToolConfig(function=search_online, model=SearchOnlineInput),
409
+ f"{search_online_google.__name__}": ToolConfig(function=search_online_google, model=SearchOnlineInput),
410
+ f"{send_email.__name__}": ToolConfig(function=send_email, model=EmailSenderInput),
411
+ f"{proxy_app_chat.__name__}": ToolConfig(function=proxy_app_chat, model=DocumentRetrieverInput),
412
+ f"{proxy_app_tool.__name__}": ToolConfig(function=proxy_app_tool, model=NoopInput),
413
+
414
+ }
415
+
416
+ #instance methods
417
+ def get_coroutine(self):
418
+ tool_cfg = self._list.get(self.app_tool.function_name)
419
+ return getattr(self, tool_cfg.function.__name__) # type: ignore