ws-bom-robot-app 0.0.58__py3-none-any.whl → 0.0.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,3 +9,5 @@ class ImageGeneratorInput(BaseModel):
9
9
  language: str = Field(description="Language of the query. Default is 'it'", default="it")
10
10
  class LlmChainInput(BaseModel):
11
11
  input: str = Field(description="Input to the LLM chain")
12
+ class SearchOnlineInput(BaseModel):
13
+ query: str = Field(description="The search query string")
@@ -4,7 +4,7 @@ from ws_bom_robot_app.llm.models.api import LlmAppTool
4
4
  from ws_bom_robot_app.llm.providers.llm_manager import LlmInterface
5
5
  from ws_bom_robot_app.llm.vector_store.db.manager import VectorDbManager
6
6
  from ws_bom_robot_app.llm.tools.utils import getRandomWaitingMessage, translate_text
7
- from ws_bom_robot_app.llm.tools.models.main import NoopInput,DocumentRetrieverInput,ImageGeneratorInput,LlmChainInput
7
+ from ws_bom_robot_app.llm.tools.models.main import NoopInput,DocumentRetrieverInput,ImageGeneratorInput,LlmChainInput,SearchOnlineInput
8
8
  from pydantic import BaseModel, ConfigDict
9
9
 
10
10
  class ToolConfig(BaseModel):
@@ -143,6 +143,74 @@ class ToolManager:
143
143
  return result
144
144
 
145
145
 
146
+ async def search_online(self, query: str):
147
+ from ws_bom_robot_app.llm.tools.utils import fetch_page, extract_content_with_trafilatura
148
+ from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
149
+ import aiohttp, asyncio, ast
150
+ # Wrapper DuckDuckGo
151
+ search = DuckDuckGoSearchAPIWrapper(max_results=10)
152
+ try:
153
+ raw_results = search.results(query, max_results=10)
154
+ except Exception as e:
155
+ print(f"[!] Errore ricerca: {e}")
156
+ urls = [r["link"] for r in raw_results]
157
+ async with aiohttp.ClientSession() as session:
158
+ tasks = [fetch_page(session, url) for url in urls]
159
+ responses = await asyncio.gather(*tasks)
160
+ final_results = []
161
+ for item in responses:
162
+ url = item["url"]
163
+ html = item["html"]
164
+ if html:
165
+ content = await extract_content_with_trafilatura(html)
166
+ if content:
167
+ final_results.append({"url": url, "content": content})
168
+ else:
169
+ final_results.append({"url": url, "content": "No content found"})
170
+ else:
171
+ final_results.append({"url": url, "content": "Page not found"})
172
+ return final_results
173
+
174
+ async def search_online_google(self, query: str):
175
+ from langchain_google_community import GoogleSearchAPIWrapper
176
+ from ws_bom_robot_app.llm.tools.utils import fetch_page, extract_content_with_trafilatura
177
+ import aiohttp, asyncio
178
+ secrets = {}
179
+ for d in self.app_tool.secrets:
180
+ secrets[d.get("secretId")] = d.get("secretValue")
181
+ search_type = secrets.get("searchType")
182
+ if search_type:
183
+ search_kwargs = {"searchType" : search_type}
184
+ search = GoogleSearchAPIWrapper(
185
+ google_api_key=secrets.get("GOOGLE_API_KEY"),
186
+ google_cse_id=secrets.get("GOOGLE_CSE_ID"),
187
+ )
188
+ if search_type:
189
+ raw_results = search.results(query=query,
190
+ num_results=secrets.get("num_results", 5),
191
+ search_params=search_kwargs)
192
+ return raw_results
193
+ raw_results = search.results(
194
+ query=query,
195
+ num_results=secrets.get("num_results", 5)
196
+ )
197
+ urls = [r["link"] for r in raw_results]
198
+ async with aiohttp.ClientSession() as session:
199
+ tasks = [fetch_page(session, url) for url in urls]
200
+ responses = await asyncio.gather(*tasks)
201
+ final_results = []
202
+ for item in responses:
203
+ url = item["url"]
204
+ html = item["html"]
205
+ if html:
206
+ content = await extract_content_with_trafilatura(html)
207
+ if content:
208
+ final_results.append({"url": url, "content": content, "type": "web"})
209
+ else:
210
+ final_results.append({"url": url, "content": "No content found", "type": "web"})
211
+ else:
212
+ final_results.append({"url": url, "content": "Page not found", "type": "web"})
213
+ return final_results
146
214
  #endregion
147
215
 
148
216
  #class variables (static)
@@ -150,6 +218,8 @@ class ToolManager:
150
218
  "document_retriever": ToolConfig(function=document_retriever, model=DocumentRetrieverInput),
151
219
  "image_generator": ToolConfig(function=image_generator, model=ImageGeneratorInput),
152
220
  "llm_chain": ToolConfig(function=llm_chain, model=LlmChainInput),
221
+ "search_online": ToolConfig(function=search_online, model=SearchOnlineInput),
222
+ "search_online_google": ToolConfig(function=search_online_google, model=SearchOnlineInput),
153
223
  }
154
224
 
155
225
  #instance methods
@@ -23,3 +23,19 @@ async def translate_text(llm: LlmInterface, language, text: str, callbacks: list
23
23
  prompt = PromptTemplate.from_template(sys_message)
24
24
  chain = prompt | llm.get_llm()
25
25
  await chain.ainvoke({"language":language, "testo_da_tradurre": text}, {"callbacks": callbacks})
26
+
27
+ async def fetch_page(session, url):
28
+ try:
29
+ async with session.get(url, timeout=10, ssl=False) as response:
30
+ if response.status == 200:
31
+ text = await response.text()
32
+ return {"url": url, "html": text}
33
+ else:
34
+ return {"url": url, "html": None}
35
+ except Exception as e:
36
+ return {"url": url, "html": None}
37
+
38
+ async def extract_content_with_trafilatura(html):
39
+ """Estrae solo il testo principale usando trafilatura"""
40
+ import trafilatura
41
+ return trafilatura.extract(html)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.58
3
+ Version: 0.0.60
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -17,6 +17,7 @@ Requires-Dist: pydantic==2.10.6
17
17
  Requires-Dist: pydantic-settings==2.7.1
18
18
  Requires-Dist: fastapi[standard]==0.115.8
19
19
  Requires-Dist: chevron==0.14.0
20
+ Requires-Dist: trafilatura==2.0.0
20
21
  Requires-Dist: langchain==0.3.25
21
22
  Requires-Dist: langchain-community==0.3.24
22
23
  Requires-Dist: langchain-core==0.3.59
@@ -48,6 +49,8 @@ Requires-Dist: unstructured-ingest[sharepoint]
48
49
  Requires-Dist: unstructured-ingest[slack]
49
50
  Requires-Dist: html5lib==1.1
50
51
  Requires-Dist: markdownify==0.14.1
52
+ Requires-Dist: duckduckgo-search==8.0.4
53
+ Requires-Dist: langchain_google_community==2.0.7
51
54
  Dynamic: author
52
55
  Dynamic: author-email
53
56
  Dynamic: classifier
@@ -26,10 +26,10 @@ ws_bom_robot_app/llm/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
26
26
  ws_bom_robot_app/llm/providers/llm_manager.py,sha256=zIkxgTLYQCcup2Ixf4eWap4mNinuJH2YmkjLjZGDyJM,8371
27
27
  ws_bom_robot_app/llm/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
28
  ws_bom_robot_app/llm/tools/tool_builder.py,sha256=p_Q32_-OSydcxzj69PgPIuiny816zYv5dVsCHSY0ELc,1188
29
- ws_bom_robot_app/llm/tools/tool_manager.py,sha256=4Zu4_CjVg-PsrAjb3MHUtwrcGyZw1NZuO02zSH4BSHg,6950
30
- ws_bom_robot_app/llm/tools/utils.py,sha256=LEId1UolLvJsMYbl-awm7h-gJ9Up9DrrnH6HTglGqVE,1347
29
+ ws_bom_robot_app/llm/tools/tool_manager.py,sha256=J8WQnjXpLe3Sv6thUi_kgXIDBmt1Z4C-ASzs4RHsVNg,10253
30
+ ws_bom_robot_app/llm/tools/utils.py,sha256=Ba7ScFZPVJ3ke8KLO8ik1wyR2f_zC99Bikqx0OGnKoI,1924
31
31
  ws_bom_robot_app/llm/tools/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
- ws_bom_robot_app/llm/tools/models/main.py,sha256=My-b-2Cr4sRORgHrJVVjUaG59c2y52cvzx3hHUhTPMM,481
32
+ ws_bom_robot_app/llm/tools/models/main.py,sha256=pBQNWPd1OZgZ2xkOnUOawNbujQ5oJXLdyuAex1afLWc,579
33
33
  ws_bom_robot_app/llm/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
34
  ws_bom_robot_app/llm/utils/agent.py,sha256=ISF9faaD5tBi-8sbgQpgfqWT1JIVcgv_lRhyaNAkI2Q,1445
35
35
  ws_bom_robot_app/llm/utils/chunker.py,sha256=N7570xBYlObneg-fsvDhPAJ-Pv8C8OaYZOBK6q7LmMI,607
@@ -65,7 +65,7 @@ ws_bom_robot_app/llm/vector_store/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5
65
65
  ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=L_ugekNuAq0N9O-24wtlHSNHkqSeD-KsJrfGt_FX9Oc,5340
66
66
  ws_bom_robot_app/llm/vector_store/loader/docling.py,sha256=yP0zgXLeFAlByaYuj-6cYariuknckrFds0dxdRcnVz8,3456
67
67
  ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=LDppW0ZATo4_1hh-KlsAM3TLawBvwBxva_a7k5Oz1sc,858
68
- ws_bom_robot_app-0.0.58.dist-info/METADATA,sha256=IjxifGGFeeCc_HL_6Z3GHup-x1BNNao9IiTZqjcuU-w,8330
69
- ws_bom_robot_app-0.0.58.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
70
- ws_bom_robot_app-0.0.58.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
71
- ws_bom_robot_app-0.0.58.dist-info/RECORD,,
68
+ ws_bom_robot_app-0.0.60.dist-info/METADATA,sha256=qOMWEMQar7dYSZllRX9OOj_F198nzb4Xmd6Ur3MR1FE,8456
69
+ ws_bom_robot_app-0.0.60.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
70
+ ws_bom_robot_app-0.0.60.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
71
+ ws_bom_robot_app-0.0.60.dist-info/RECORD,,