ws-bom-robot-app 0.0.57__tar.gz → 0.0.59__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. {ws_bom_robot_app-0.0.57/ws_bom_robot_app.egg-info → ws_bom_robot_app-0.0.59}/PKG-INFO +9 -7
  2. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/requirements.txt +8 -6
  3. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/setup.py +1 -1
  4. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/config.py +3 -0
  5. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/agent_handler.py +1 -2
  6. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/providers/llm_manager.py +1 -1
  7. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/tools/models/main.py +4 -0
  8. ws_bom_robot_app-0.0.59/ws_bom_robot_app/llm/tools/tool_manager.py +188 -0
  9. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/tools/utils.py +16 -0
  10. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59/ws_bom_robot_app.egg-info}/PKG-INFO +9 -7
  11. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app.egg-info/requires.txt +8 -6
  12. ws_bom_robot_app-0.0.57/ws_bom_robot_app/llm/tools/tool_manager.py +0 -134
  13. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/MANIFEST.in +0 -0
  14. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/README.md +0 -0
  15. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/pyproject.toml +0 -0
  16. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/setup.cfg +0 -0
  17. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/__init__.py +0 -0
  18. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/auth.py +0 -0
  19. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/cron_manager.py +0 -0
  20. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/__init__.py +0 -0
  21. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/agent_context.py +0 -0
  22. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/agent_description.py +0 -0
  23. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/agent_lcel.py +0 -0
  24. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/api.py +0 -0
  25. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/defaut_prompt.py +0 -0
  26. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/feedbacks/__init__.py +0 -0
  27. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/feedbacks/feedback_manager.py +0 -0
  28. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/main.py +0 -0
  29. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/models/__init__.py +0 -0
  30. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/models/api.py +0 -0
  31. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/models/base.py +0 -0
  32. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/models/feedback.py +0 -0
  33. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/models/kb.py +0 -0
  34. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/nebuly_handler.py +0 -0
  35. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/providers/__init__.py +0 -0
  36. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/settings.py +0 -0
  37. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/tools/__init__.py +0 -0
  38. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/tools/models/__init__.py +0 -0
  39. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/tools/tool_builder.py +0 -0
  40. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/utils/__init__.py +0 -0
  41. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/utils/agent.py +0 -0
  42. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/utils/chunker.py +0 -0
  43. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/utils/download.py +0 -0
  44. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/utils/kb.py +0 -0
  45. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/utils/print.py +0 -0
  46. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/utils/secrets.py +0 -0
  47. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/utils/webhooks.py +0 -0
  48. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/__init__.py +0 -0
  49. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/db/__init__.py +0 -0
  50. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/db/base.py +0 -0
  51. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/db/chroma.py +0 -0
  52. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/db/faiss.py +0 -0
  53. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/db/manager.py +0 -0
  54. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/db/qdrant.py +0 -0
  55. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/generator.py +0 -0
  56. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/integration/__init__.py +0 -0
  57. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/integration/azure.py +0 -0
  58. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/integration/base.py +0 -0
  59. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/integration/confluence.py +0 -0
  60. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/integration/dropbox.py +0 -0
  61. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/integration/gcs.py +0 -0
  62. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/integration/github.py +0 -0
  63. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/integration/googledrive.py +0 -0
  64. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/integration/jira.py +0 -0
  65. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/integration/manager.py +0 -0
  66. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/integration/s3.py +0 -0
  67. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/integration/sftp.py +0 -0
  68. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/integration/sharepoint.py +0 -0
  69. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/integration/sitemap.py +0 -0
  70. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/integration/slack.py +0 -0
  71. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/loader/__init__.py +0 -0
  72. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/loader/base.py +0 -0
  73. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/loader/docling.py +0 -0
  74. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/llm/vector_store/loader/json_loader.py +0 -0
  75. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/main.py +0 -0
  76. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/task_manager.py +0 -0
  77. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app/util.py +0 -0
  78. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app.egg-info/SOURCES.txt +0 -0
  79. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app.egg-info/dependency_links.txt +0 -0
  80. {ws_bom_robot_app-0.0.57 → ws_bom_robot_app-0.0.59}/ws_bom_robot_app.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.57
3
+ Version: 0.0.59
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -17,15 +17,16 @@ Requires-Dist: pydantic==2.10.6
17
17
  Requires-Dist: pydantic-settings==2.7.1
18
18
  Requires-Dist: fastapi[standard]==0.115.8
19
19
  Requires-Dist: chevron==0.14.0
20
- Requires-Dist: langchain==0.3.18
21
- Requires-Dist: langchain-community==0.3.17
22
- Requires-Dist: langchain-core==0.3.34
23
- Requires-Dist: langchain-openai==0.3.5
20
+ Requires-Dist: trafilatura==2.0.0
21
+ Requires-Dist: langchain==0.3.25
22
+ Requires-Dist: langchain-community==0.3.24
23
+ Requires-Dist: langchain-core==0.3.59
24
+ Requires-Dist: langchain-openai==0.3.16
24
25
  Requires-Dist: langchain-anthropic==0.3.6
25
26
  Requires-Dist: langchain-google-genai==2.0.7
26
27
  Requires-Dist: langchain-google-vertexai==2.0.13
27
- Requires-Dist: langchain-groq==0.2.4
28
- Requires-Dist: langchain-ollama==0.2.3
28
+ Requires-Dist: langchain-groq==0.3.2
29
+ Requires-Dist: langchain-ollama==0.3.2
29
30
  Requires-Dist: faiss-cpu==1.9.0
30
31
  Requires-Dist: chromadb==0.6.3
31
32
  Requires-Dist: langchain_chroma==0.2.1
@@ -48,6 +49,7 @@ Requires-Dist: unstructured-ingest[sharepoint]
48
49
  Requires-Dist: unstructured-ingest[slack]
49
50
  Requires-Dist: html5lib==1.1
50
51
  Requires-Dist: markdownify==0.14.1
52
+ Requires-Dist: duckduckgo-search==8.0.4
51
53
  Dynamic: author
52
54
  Dynamic: author-email
53
55
  Dynamic: classifier
@@ -6,17 +6,18 @@ pydantic==2.10.6
6
6
  pydantic-settings==2.7.1
7
7
  fastapi[standard]==0.115.8
8
8
  chevron==0.14.0
9
+ trafilatura==2.0.0
9
10
 
10
11
  #framework
11
- langchain==0.3.18
12
- langchain-community==0.3.17
13
- langchain-core==0.3.34
14
- langchain-openai==0.3.5
12
+ langchain==0.3.25
13
+ langchain-community==0.3.24
14
+ langchain-core==0.3.59
15
+ langchain-openai==0.3.16
15
16
  langchain-anthropic==0.3.6 #issue get_models() from 0.3.7
16
17
  langchain-google-genai==2.0.7 #waiting for new release: https://github.com/langchain-ai/langchain-google/issues/711
17
18
  langchain-google-vertexai==2.0.13
18
- langchain-groq==0.2.4
19
- langchain-ollama==0.2.3
19
+ langchain-groq==0.3.2
20
+ langchain-ollama==0.3.2
20
21
 
21
22
  #vector DB
22
23
  faiss-cpu==1.9.0
@@ -46,3 +47,4 @@ html5lib==1.1 #beautifulsoup4 parser
46
47
 
47
48
  #integrations
48
49
  markdownify==0.14.1 #sitemap
50
+ duckduckgo-search==8.0.4
@@ -4,7 +4,7 @@ _requirements = [line.split('#')[0].strip() for line in open("requirements.txt")
4
4
 
5
5
  setup(
6
6
  name="ws_bom_robot_app",
7
- version="0.0.57",
7
+ version="0.0.59",
8
8
  description="A FastAPI application serving ws bom/robot/llm platform ai.",
9
9
  long_description=open("README.md", encoding='utf-8').read(),
10
10
  long_description_content_type="text/markdown",
@@ -28,6 +28,7 @@ class Settings(BaseSettings):
28
28
  GOOGLE_API_KEY: str = ''
29
29
  NEBULY_API_URL: str =''
30
30
  GOOGLE_APPLICATION_CREDENTIALS: str = '' # path to google credentials iam file, e.d. ./.secrets/google-credentials.json
31
+ TAVILY_API_KEY: str = '' #TODO DELETE
31
32
  model_config = ConfigDict(
32
33
  env_file='./.env',
33
34
  extra='ignore',
@@ -43,6 +44,8 @@ class Settings(BaseSettings):
43
44
  os.environ["GOOGLE_API_KEY"] = self.GOOGLE_API_KEY
44
45
  os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.GOOGLE_APPLICATION_CREDENTIALS
45
46
  os.environ["NEBULY_API_URL"] = self.NEBULY_API_URL
47
+ # TODO DELETE
48
+ os.environ["TAVILY_API_KEY"] = self.TAVILY_API_KEY
46
49
 
47
50
  class RuntimeOptions(BaseModel):
48
51
  @staticmethod
@@ -41,7 +41,6 @@ class AgentHandler(AsyncCallbackHandler):
41
41
  self.json_start_regex = re.compile(r'(`{1,3}\s*json\b)') # detect a potential json start fence.
42
42
  self.json_end_regex = re.compile(r'(`{1,3})') # an end fence (one to three backticks).
43
43
  self.stream_cut_last_output_chunk_size = 16 # safe cut last chunk size to output if no markers are found
44
-
45
44
  async def on_chat_model_start(self, serialized, messages, *, run_id, parent_run_id = None, tags = None, metadata = None, **kwargs):
46
45
  if not self.__started:
47
46
  self.__started = True
@@ -102,7 +101,7 @@ class AgentHandler(AsyncCallbackHandler):
102
101
  finally:
103
102
  self.json_buffer = ""
104
103
  # remove the end fence from pending.
105
- self.stream_buffer = self.stream_buffer[end_match.end():].replace('`','').strip()
104
+ self.stream_buffer = self.stream_buffer[end_match.end():].strip()
106
105
  self.in_json_block = False
107
106
  else:
108
107
  # no end marker found
@@ -48,7 +48,7 @@ class OpenAI(LlmInterface):
48
48
  api_key=self.config.api_key or os.getenv("OPENAI_API_KEY"),
49
49
  model=self.config.model,
50
50
  stream_usage=True)
51
- if not any(self.config.model.startswith(prefix) for prefix in ["o1", "o3"]):
51
+ if not (any(self.config.model.startswith(prefix) for prefix in ["o1", "o3"]) or "search" in self.config.model):
52
52
  chat.temperature = self.config.temperature
53
53
  chat.streaming = True
54
54
  return chat
@@ -7,3 +7,7 @@ class DocumentRetrieverInput(BaseModel):
7
7
  class ImageGeneratorInput(BaseModel):
8
8
  query: str = Field(description="description of the image to generate.")
9
9
  language: str = Field(description="Language of the query. Default is 'it'", default="it")
10
+ class LlmChainInput(BaseModel):
11
+ input: str = Field(description="Input to the LLM chain")
12
+ class SearchOnlineInput(BaseModel):
13
+ query: str = Field(description="The search query string")
@@ -0,0 +1,188 @@
1
+ from asyncio import Queue
2
+ from typing import Optional, Type, Callable
3
+ from ws_bom_robot_app.llm.models.api import LlmAppTool
4
+ from ws_bom_robot_app.llm.providers.llm_manager import LlmInterface
5
+ from ws_bom_robot_app.llm.vector_store.db.manager import VectorDbManager
6
+ from ws_bom_robot_app.llm.tools.utils import getRandomWaitingMessage, translate_text
7
+ from ws_bom_robot_app.llm.tools.models.main import NoopInput,DocumentRetrieverInput,ImageGeneratorInput,LlmChainInput,SearchOnlineInput
8
+ from pydantic import BaseModel, ConfigDict
9
+
10
+ class ToolConfig(BaseModel):
11
+ function: Callable
12
+ model: Optional[Type[BaseModel]] = NoopInput
13
+ model_config = ConfigDict(
14
+ arbitrary_types_allowed=True
15
+ )
16
+
17
+ class ToolManager:
18
+ """
19
+ ToolManager is responsible for managing various tools used in the application.
20
+
21
+ Attributes:
22
+ app_tool (LlmAppTool): The application tool configuration.
23
+ api_key (str): The API key for accessing external services.
24
+ callbacks (list): A list of callback functions to be executed.
25
+
26
+ Methods:
27
+ document_retriever(query: str): Asynchronously retrieves documents based on the query.
28
+ image_generator(query: str, language: str = "it"): Asynchronously generates an image based on the query.
29
+ get_coroutine(): Retrieves the coroutine function based on the tool configuration.
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ llm: LlmInterface,
35
+ app_tool: LlmAppTool,
36
+ callbacks: list,
37
+ queue: Optional[Queue] = None
38
+ ):
39
+ self.llm = llm
40
+ self.app_tool = app_tool
41
+ self.callbacks = callbacks
42
+ self.queue = queue
43
+
44
+ async def __extract_documents(self, query: str, app_tool: LlmAppTool):
45
+ search_type = "similarity"
46
+ search_kwargs = {"k": 4}
47
+ if app_tool.search_settings:
48
+ search_settings = app_tool.search_settings # type: ignore
49
+ if search_settings.search_type == "similarityScoreThreshold":
50
+ search_type = "similarity_score_threshold"
51
+ search_kwargs = {
52
+ "score_threshold": search_settings.score_threshold_id if search_settings.score_threshold_id else 0.5,
53
+ "k": search_settings.search_k if search_settings.search_k else 100
54
+ }
55
+ elif search_settings.search_type == "mmr":
56
+ search_type = "mmr"
57
+ search_kwargs = {"k": search_settings.search_k if search_settings.search_k else 4}
58
+ elif search_settings.search_type == "default":
59
+ search_type = "similarity"
60
+ search_kwargs = {"k": search_settings.search_k if search_settings.search_k else 4}
61
+ else:
62
+ search_type = "mixed"
63
+ search_kwargs = {"k": search_settings.search_k if search_settings.search_k else 4}
64
+ if self.queue:
65
+ await self.queue.put(getRandomWaitingMessage(app_tool.waiting_message, traduction=False))
66
+
67
+ return await VectorDbManager.get_strategy(app_tool.vector_type).invoke(
68
+ self.llm.get_embeddings(),
69
+ app_tool.vector_db,
70
+ query,
71
+ search_type,
72
+ search_kwargs,
73
+ app_tool=app_tool,
74
+ llm=self.llm.get_llm(),
75
+ source=app_tool.function_id,
76
+ )
77
+
78
+ #region functions
79
+ async def document_retriever(self, query: str) -> list:
80
+ """
81
+ Asynchronously retrieves documents based on the provided query using the specified search settings.
82
+
83
+ Args:
84
+ query (str): The search query string.
85
+
86
+ Returns:
87
+ list: A list of retrieved documents based on the search criteria.
88
+
89
+ Raises:
90
+ ValueError: If the configuration for the tool is invalid or the vector database is not found.
91
+
92
+ Notes:
93
+ - The function supports different search types such as "similarity", "similarity_score_threshold", "mmr", and "mixed".
94
+ - The search settings can be customized through the `app_tool.search_settings` attribute.
95
+ - If a queue is provided, a waiting message is put into the queue before invoking the search.
96
+ """
97
+ if (
98
+ self.app_tool.type == "function" and self.app_tool.vector_db
99
+ #and self.settings.get("dataSource") == "knowledgebase"
100
+ ):
101
+ return await self.__extract_documents(query, self.app_tool)
102
+
103
+ async def image_generator(self, query: str, language: str = "it"):
104
+ """
105
+ Asynchronously generates an image based on the query.
106
+ set OPENAI_API_KEY in your environment variables
107
+ """
108
+ from langchain_community.utilities.dalle_image_generator import DallEAPIWrapper
109
+ model = self.app_tool.model or "dall-e-3"
110
+ random_waiting_message = getRandomWaitingMessage(self.app_tool.waiting_message, traduction=False)
111
+ if not language:
112
+ language = "it"
113
+ await translate_text(
114
+ self.llm, language, random_waiting_message, self.callbacks
115
+ )
116
+ try:
117
+ #set os.environ.get("OPENAI_API_KEY")!
118
+ image_url = DallEAPIWrapper(model=model).run(query) # type: ignore
119
+ return image_url
120
+ except Exception as e:
121
+ return f"Error: {str(e)}"
122
+
123
+ async def llm_chain(self, input: str):
124
+ if self.app_tool.type == "llmChain":
125
+ from langchain_core.prompts import ChatPromptTemplate
126
+ from langchain_core.output_parsers import StrOutputParser
127
+ system_message = self.app_tool.llm_chain_settings.prompt
128
+ context = []
129
+ if self.app_tool.data_source == "knowledgebase":
130
+ context = await self.__extract_documents(input, self.app_tool)
131
+ if len(context) > 0:
132
+ for doc in context:
133
+ system_message += f"\n\nContext:\n{doc.metadata.get("source", "")}: {doc.page_content}"
134
+ prompt = ChatPromptTemplate.from_messages(
135
+ [ ("system", system_message),
136
+ ("user", "{input}")],
137
+ )
138
+ model = self.app_tool.llm_chain_settings.model
139
+ self.llm.config.model = model
140
+ llm = self.llm.get_llm()
141
+ chain = prompt | llm | StrOutputParser()
142
+ result = await chain.ainvoke({"input": input})
143
+ return result
144
+
145
+
146
+ async def search_online(self, query: str):
147
+ from ws_bom_robot_app.llm.tools.utils import fetch_page, extract_content_with_trafilatura
148
+ from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
149
+ import aiohttp, asyncio, ast
150
+ # Wrapper DuckDuckGo
151
+ search = DuckDuckGoSearchAPIWrapper(max_results=10)
152
+ try:
153
+ raw_results = search.results(query, max_results=10)
154
+ except Exception as e:
155
+ print(f"[!] Errore ricerca: {e}")
156
+ urls = [r["link"] for r in raw_results]
157
+ async with aiohttp.ClientSession() as session:
158
+ tasks = [fetch_page(session, url) for url in urls]
159
+ responses = await asyncio.gather(*tasks)
160
+ final_results = []
161
+ for item in responses:
162
+ url = item["url"]
163
+ html = item["html"]
164
+ if html:
165
+ content = await extract_content_with_trafilatura(html)
166
+ if content:
167
+ final_results.append({"url": url, "content": content})
168
+ else:
169
+ final_results.append({"url": url, "content": "No content found"})
170
+ else:
171
+ final_results.append({"url": url, "content": "Page not found"})
172
+ return final_results
173
+
174
+
175
+ #endregion
176
+
177
+ #class variables (static)
178
+ _list: dict[str,ToolConfig] = {
179
+ "document_retriever": ToolConfig(function=document_retriever, model=DocumentRetrieverInput),
180
+ "image_generator": ToolConfig(function=image_generator, model=ImageGeneratorInput),
181
+ "llm_chain": ToolConfig(function=llm_chain, model=LlmChainInput),
182
+ "search_online": ToolConfig(function=search_online, model=SearchOnlineInput),
183
+ }
184
+
185
+ #instance methods
186
+ def get_coroutine(self):
187
+ tool_cfg = self._list.get(self.app_tool.function_name)
188
+ return getattr(self, tool_cfg.function.__name__) # type: ignore
@@ -23,3 +23,19 @@ async def translate_text(llm: LlmInterface, language, text: str, callbacks: list
23
23
  prompt = PromptTemplate.from_template(sys_message)
24
24
  chain = prompt | llm.get_llm()
25
25
  await chain.ainvoke({"language":language, "testo_da_tradurre": text}, {"callbacks": callbacks})
26
+
27
+ async def fetch_page(session, url):
28
+ try:
29
+ async with session.get(url, timeout=10, ssl=False) as response:
30
+ if response.status == 200:
31
+ text = await response.text()
32
+ return {"url": url, "html": text}
33
+ else:
34
+ return {"url": url, "html": None}
35
+ except Exception as e:
36
+ return {"url": url, "html": None}
37
+
38
+ async def extract_content_with_trafilatura(html):
39
+ """Estrae solo il testo principale usando trafilatura"""
40
+ import trafilatura
41
+ return trafilatura.extract(html)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.57
3
+ Version: 0.0.59
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -17,15 +17,16 @@ Requires-Dist: pydantic==2.10.6
17
17
  Requires-Dist: pydantic-settings==2.7.1
18
18
  Requires-Dist: fastapi[standard]==0.115.8
19
19
  Requires-Dist: chevron==0.14.0
20
- Requires-Dist: langchain==0.3.18
21
- Requires-Dist: langchain-community==0.3.17
22
- Requires-Dist: langchain-core==0.3.34
23
- Requires-Dist: langchain-openai==0.3.5
20
+ Requires-Dist: trafilatura==2.0.0
21
+ Requires-Dist: langchain==0.3.25
22
+ Requires-Dist: langchain-community==0.3.24
23
+ Requires-Dist: langchain-core==0.3.59
24
+ Requires-Dist: langchain-openai==0.3.16
24
25
  Requires-Dist: langchain-anthropic==0.3.6
25
26
  Requires-Dist: langchain-google-genai==2.0.7
26
27
  Requires-Dist: langchain-google-vertexai==2.0.13
27
- Requires-Dist: langchain-groq==0.2.4
28
- Requires-Dist: langchain-ollama==0.2.3
28
+ Requires-Dist: langchain-groq==0.3.2
29
+ Requires-Dist: langchain-ollama==0.3.2
29
30
  Requires-Dist: faiss-cpu==1.9.0
30
31
  Requires-Dist: chromadb==0.6.3
31
32
  Requires-Dist: langchain_chroma==0.2.1
@@ -48,6 +49,7 @@ Requires-Dist: unstructured-ingest[sharepoint]
48
49
  Requires-Dist: unstructured-ingest[slack]
49
50
  Requires-Dist: html5lib==1.1
50
51
  Requires-Dist: markdownify==0.14.1
52
+ Requires-Dist: duckduckgo-search==8.0.4
51
53
  Dynamic: author
52
54
  Dynamic: author-email
53
55
  Dynamic: classifier
@@ -5,15 +5,16 @@ pydantic==2.10.6
5
5
  pydantic-settings==2.7.1
6
6
  fastapi[standard]==0.115.8
7
7
  chevron==0.14.0
8
- langchain==0.3.18
9
- langchain-community==0.3.17
10
- langchain-core==0.3.34
11
- langchain-openai==0.3.5
8
+ trafilatura==2.0.0
9
+ langchain==0.3.25
10
+ langchain-community==0.3.24
11
+ langchain-core==0.3.59
12
+ langchain-openai==0.3.16
12
13
  langchain-anthropic==0.3.6
13
14
  langchain-google-genai==2.0.7
14
15
  langchain-google-vertexai==2.0.13
15
- langchain-groq==0.2.4
16
- langchain-ollama==0.2.3
16
+ langchain-groq==0.3.2
17
+ langchain-ollama==0.3.2
17
18
  faiss-cpu==1.9.0
18
19
  chromadb==0.6.3
19
20
  langchain_chroma==0.2.1
@@ -36,3 +37,4 @@ unstructured-ingest[sharepoint]
36
37
  unstructured-ingest[slack]
37
38
  html5lib==1.1
38
39
  markdownify==0.14.1
40
+ duckduckgo-search==8.0.4
@@ -1,134 +0,0 @@
1
- from asyncio import Queue
2
- from typing import Optional, Type, Callable
3
- from ws_bom_robot_app.llm.models.api import LlmAppTool
4
- from ws_bom_robot_app.llm.providers.llm_manager import LlmInterface
5
- from ws_bom_robot_app.llm.vector_store.db.manager import VectorDbManager
6
- from ws_bom_robot_app.llm.tools.utils import getRandomWaitingMessage, translate_text
7
- from ws_bom_robot_app.llm.tools.models.main import NoopInput,DocumentRetrieverInput,ImageGeneratorInput
8
- from pydantic import BaseModel, ConfigDict
9
-
10
- class ToolConfig(BaseModel):
11
- function: Callable
12
- model: Optional[Type[BaseModel]] = NoopInput
13
- model_config = ConfigDict(
14
- arbitrary_types_allowed=True
15
- )
16
-
17
- class ToolManager:
18
- """
19
- ToolManager is responsible for managing various tools used in the application.
20
-
21
- Attributes:
22
- app_tool (LlmAppTool): The application tool configuration.
23
- api_key (str): The API key for accessing external services.
24
- callbacks (list): A list of callback functions to be executed.
25
-
26
- Methods:
27
- document_retriever(query: str): Asynchronously retrieves documents based on the query.
28
- image_generator(query: str, language: str = "it"): Asynchronously generates an image based on the query.
29
- get_coroutine(): Retrieves the coroutine function based on the tool configuration.
30
- """
31
-
32
- def __init__(
33
- self,
34
- llm: LlmInterface,
35
- app_tool: LlmAppTool,
36
- callbacks: list,
37
- queue: Optional[Queue] = None
38
- ):
39
- self.llm = llm
40
- self.app_tool = app_tool
41
- self.callbacks = callbacks
42
- self.queue = queue
43
-
44
-
45
- #region functions
46
- async def document_retriever(self, query: str) -> list:
47
- """
48
- Asynchronously retrieves documents based on the provided query using the specified search settings.
49
-
50
- Args:
51
- query (str): The search query string.
52
-
53
- Returns:
54
- list: A list of retrieved documents based on the search criteria.
55
-
56
- Raises:
57
- ValueError: If the configuration for the tool is invalid or the vector database is not found.
58
-
59
- Notes:
60
- - The function supports different search types such as "similarity", "similarity_score_threshold", "mmr", and "mixed".
61
- - The search settings can be customized through the `app_tool.search_settings` attribute.
62
- - If a queue is provided, a waiting message is put into the queue before invoking the search.
63
- """
64
- if (
65
- self.app_tool.type == "function" and self.app_tool.vector_db
66
- #and self.settings.get("dataSource") == "knowledgebase"
67
- ):
68
- search_type = "similarity"
69
- search_kwargs = {"k": 4}
70
- if self.app_tool.search_settings:
71
- search_settings = self.app_tool.search_settings # type: ignore
72
- if search_settings.search_type == "similarityScoreThreshold":
73
- search_type = "similarity_score_threshold"
74
- search_kwargs = {
75
- "score_threshold": search_settings.score_threshold_id if search_settings.score_threshold_id else 0.5,
76
- "k": search_settings.search_k if search_settings.search_k else 100
77
- }
78
- elif search_settings.search_type == "mmr":
79
- search_type = "mmr"
80
- search_kwargs = {"k": search_settings.search_k if search_settings.search_k else 4}
81
- elif search_settings.search_type == "default":
82
- search_type = "similarity"
83
- search_kwargs = {"k": search_settings.search_k if search_settings.search_k else 4}
84
- else:
85
- search_type = "mixed"
86
- search_kwargs = {"k": search_settings.search_k if search_settings.search_k else 4}
87
- if self.queue:
88
- await self.queue.put(getRandomWaitingMessage(self.app_tool.waiting_message, traduction=False))
89
-
90
- return await VectorDbManager.get_strategy(self.app_tool.vector_type).invoke(
91
- self.llm.get_embeddings(),
92
- self.app_tool.vector_db,
93
- query,
94
- search_type,
95
- search_kwargs,
96
- app_tool=self.app_tool,
97
- llm=self.llm.get_llm(),
98
- source=self.app_tool.function_id,
99
- )
100
- return []
101
- #raise ValueError(f"Invalid configuration for {self.settings.name} tool of type {self.settings.type}. Must be a function or vector db not found.")
102
-
103
- async def image_generator(self, query: str, language: str = "it"):
104
- """
105
- Asynchronously generates an image based on the query.
106
- set OPENAI_API_KEY in your environment variables
107
- """
108
- from langchain_community.utilities.dalle_image_generator import DallEAPIWrapper
109
- model = self.app_tool.model or "dall-e-3"
110
- random_waiting_message = getRandomWaitingMessage(self.app_tool.waiting_message, traduction=False)
111
- if not language:
112
- language = "it"
113
- await translate_text(
114
- self.llm, language, random_waiting_message, self.callbacks
115
- )
116
- try:
117
- #set os.environ.get("OPENAI_API_KEY")!
118
- image_url = DallEAPIWrapper(model=model).run(query) # type: ignore
119
- return image_url
120
- except Exception as e:
121
- return f"Error: {str(e)}"
122
-
123
- #endregion
124
-
125
- #class variables (static)
126
- _list: dict[str,ToolConfig] = {
127
- "document_retriever": ToolConfig(function=document_retriever, model=DocumentRetrieverInput),
128
- "image_generator": ToolConfig(function=image_generator, model=ImageGeneratorInput),
129
- }
130
-
131
- #instance methods
132
- def get_coroutine(self):
133
- tool_cfg = self._list.get(self.app_tool.function_name)
134
- return getattr(self, tool_cfg.function.__name__) # type: ignore