ws-bom-robot-app 0.0.29__tar.gz → 0.0.31__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. ws_bom_robot_app-0.0.31/MANIFEST.in +2 -0
  2. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/PKG-INFO +12 -15
  3. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/README.md +1 -4
  4. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/pyproject.toml +1 -1
  5. {ws_bom_robot_app-0.0.29/ws_bom_robot_app → ws_bom_robot_app-0.0.31}/requirements.txt +10 -10
  6. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/setup.py +4 -3
  7. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/agent_description.py +124 -124
  8. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/agent_handler.py +167 -167
  9. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/agent_lcel.py +64 -64
  10. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/defaut_prompt.py +9 -9
  11. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/main.py +102 -102
  12. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/settings.py +4 -4
  13. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/tools/tool_builder.py +19 -19
  14. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/tools/tool_manager.py +101 -101
  15. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/tools/utils.py +25 -25
  16. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/utils/agent_utils.py +16 -16
  17. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/utils/download.py +79 -79
  18. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/utils/print.py +29 -29
  19. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/vector_store/generator.py +137 -137
  20. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/vector_store/integration/sitemap.py +12 -3
  21. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/vector_store/loader/base.py +2 -2
  22. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/vector_store/loader/docling.py +4 -2
  23. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/vector_store/loader/json_loader.py +25 -25
  24. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app.egg-info/PKG-INFO +12 -15
  25. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app.egg-info/SOURCES.txt +2 -1
  26. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app.egg-info/requires.txt +10 -10
  27. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/setup.cfg +0 -0
  28. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/__init__.py +0 -0
  29. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/auth.py +0 -0
  30. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/config.py +0 -0
  31. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/cron_manager.py +0 -0
  32. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/__init__.py +0 -0
  33. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/api.py +0 -0
  34. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/models/__init__.py +0 -0
  35. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/models/api.py +0 -0
  36. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/models/base.py +0 -0
  37. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/models/kb.py +0 -0
  38. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/tools/__init__.py +0 -0
  39. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/tools/models/__init__.py +0 -0
  40. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/tools/models/main.py +0 -0
  41. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/utils/__init__.py +0 -0
  42. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/utils/chunker.py +0 -0
  43. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/utils/faiss_helper.py +0 -0
  44. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/utils/kb.py +0 -0
  45. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/utils/webhooks.py +0 -0
  46. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/vector_store/__init__.py +0 -0
  47. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/vector_store/integration/__init__.py +0 -0
  48. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/vector_store/integration/azure.py +0 -0
  49. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/vector_store/integration/base.py +0 -0
  50. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/vector_store/integration/confluence.py +0 -0
  51. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/vector_store/integration/dropbox.py +0 -0
  52. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/vector_store/integration/gcs.py +0 -0
  53. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/vector_store/integration/github.py +0 -0
  54. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/vector_store/integration/googledrive.py +0 -0
  55. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/vector_store/integration/jira.py +0 -0
  56. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/vector_store/integration/manager.py +0 -0
  57. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/vector_store/integration/s3.py +0 -0
  58. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/vector_store/integration/sftp.py +0 -0
  59. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/vector_store/integration/sharepoint.py +0 -0
  60. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/vector_store/integration/slack.py +0 -0
  61. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/llm/vector_store/loader/__init__.py +0 -0
  62. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/main.py +0 -0
  63. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/task_manager.py +0 -0
  64. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app/util.py +0 -0
  65. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app.egg-info/dependency_links.txt +0 -0
  66. {ws_bom_robot_app-0.0.29 → ws_bom_robot_app-0.0.31}/ws_bom_robot_app.egg-info/top_level.txt +0 -0
@@ -0,0 +1,2 @@
1
+ include requirements.txt
2
+ include README.md
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.29
3
+ Version: 0.0.31
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -13,17 +13,17 @@ Description-Content-Type: text/markdown
13
13
  Requires-Dist: standardwebhooks==1.0.0
14
14
  Requires-Dist: apscheduler==3.11.0
15
15
  Requires-Dist: aiofiles==24.1.0
16
- Requires-Dist: pydantic==2.10.3
17
- Requires-Dist: pydantic-settings==2.6.1
18
- Requires-Dist: fastapi[standard]==0.115.5
19
- Requires-Dist: langchain==0.3.9
20
- Requires-Dist: langchain-openai==0.2.10
21
- Requires-Dist: langchain-community==0.3.8
22
- Requires-Dist: langchain-core==0.3.21
16
+ Requires-Dist: pydantic==2.10.5
17
+ Requires-Dist: pydantic-settings==2.7.1
18
+ Requires-Dist: fastapi[standard]==0.115.6
19
+ Requires-Dist: langchain==0.3.14
20
+ Requires-Dist: langchain-openai==0.3.0
21
+ Requires-Dist: langchain-community==0.3.14
22
+ Requires-Dist: langchain-core==0.3.29
23
23
  Requires-Dist: faiss-cpu==1.9.0
24
- Requires-Dist: unstructured==0.16.11
24
+ Requires-Dist: unstructured==0.16.13
25
25
  Requires-Dist: unstructured[image]
26
- Requires-Dist: unstructured-ingest==0.3.8
26
+ Requires-Dist: unstructured-ingest==0.3.14
27
27
  Requires-Dist: unstructured-ingest[azure]
28
28
  Requires-Dist: unstructured-ingest[confluence]
29
29
  Requires-Dist: unstructured-ingest[dropbox]
@@ -37,7 +37,7 @@ Requires-Dist: unstructured-ingest[sharepoint]
37
37
  Requires-Dist: unstructured-ingest[slack]
38
38
  Requires-Dist: html5lib==1.1
39
39
  Requires-Dist: markdownify==0.14.1
40
- Requires-Dist: nebuly==0.3.35
40
+ Requires-Dist: nebuly==0.3.36
41
41
  Dynamic: author
42
42
  Dynamic: author-email
43
43
  Dynamic: classifier
@@ -185,10 +185,7 @@ py -m pip install --upgrade setuptools build twine streamlit
185
185
  ### 🪛 build
186
186
 
187
187
  ```pwsh
188
- if (Test-Path ./dist) {rm ./dist -r -force}; `
189
- cp .\requirements.txt .\ws_bom_robot_app\ && `
190
- py -m build && `
191
- twine check dist/*
188
+ py -m build && twine check dist/*
192
189
  ```
193
190
 
194
191
  ### 📦 test / 🧪 debugger
@@ -135,10 +135,7 @@ py -m pip install --upgrade setuptools build twine streamlit
135
135
  ### 🪛 build
136
136
 
137
137
  ```pwsh
138
- if (Test-Path ./dist) {rm ./dist -r -force}; `
139
- cp .\requirements.txt .\ws_bom_robot_app\ && `
140
- py -m build && `
141
- twine check dist/*
138
+ py -m build && twine check dist/*
142
139
  ```
143
140
 
144
141
  ### 📦 test / 🧪 debugger
@@ -1,5 +1,5 @@
1
1
  [build-system]
2
- requires = ["setuptools >= 64"]
2
+ requires = ["setuptools >= 75"]
3
3
  build-backend = "setuptools.build_meta"
4
4
 
5
5
  [tool.pytest.ini_options]
@@ -2,15 +2,15 @@
2
2
  standardwebhooks==1.0.0
3
3
  apscheduler==3.11.0
4
4
  aiofiles==24.1.0
5
- pydantic==2.10.3
6
- pydantic-settings==2.6.1
7
- fastapi[standard]==0.115.5
5
+ pydantic==2.10.5
6
+ pydantic-settings==2.7.1
7
+ fastapi[standard]==0.115.6
8
8
 
9
9
  #framework
10
- langchain==0.3.9
11
- langchain-openai==0.2.10
12
- langchain-community==0.3.8
13
- langchain-core==0.3.21
10
+ langchain==0.3.14
11
+ langchain-openai==0.3.0
12
+ langchain-community==0.3.14
13
+ langchain-core==0.3.29
14
14
 
15
15
  #vector DB
16
16
  faiss-cpu==1.9.0
@@ -20,11 +20,11 @@ faiss-cpu==1.9.0
20
20
  #loaders
21
21
  #python-magic==0.4.27
22
22
  #opencv-python-headless==4.10.0.84 #docker specs
23
- unstructured==0.16.11
23
+ unstructured==0.16.13
24
24
  unstructured[image]
25
25
  #unstructured[all-docs]==0.16.11
26
26
  #langchain_unstructured==0.1.5
27
- unstructured-ingest==0.3.8
27
+ unstructured-ingest==0.3.14
28
28
  unstructured-ingest[azure]
29
29
  unstructured-ingest[confluence]
30
30
  unstructured-ingest[dropbox]
@@ -42,4 +42,4 @@ html5lib==1.1 #beautifulsoup4 parser
42
42
  markdownify==0.14.1 #sitemap
43
43
 
44
44
  #telemetry
45
- nebuly==0.3.35
45
+ nebuly==0.3.36
@@ -1,8 +1,10 @@
1
1
  from setuptools import setup, find_packages
2
2
 
3
+ _requirements = [line.split('#')[0].strip() for line in open("requirements.txt").readlines() if all([line.strip(), not line.startswith("#")])]
4
+
3
5
  setup(
4
6
  name="ws_bom_robot_app",
5
- version="0.0.29",
7
+ version="0.0.31",
6
8
  description="A FastAPI application serving ws bom/robot/llm platform ai.",
7
9
  long_description=open("README.md", encoding='utf-8').read(),
8
10
  long_description_content_type="text/markdown",
@@ -10,7 +12,7 @@ setup(
10
12
  author_email="dev@websolute.it",
11
13
  url="https://github.com/websolutespa/bom",
12
14
  packages=find_packages(),
13
- install_requires=[line.split('#')[0].strip() for line in open("ws_bom_robot_app/requirements.txt").readlines() if all([line.strip(), not line.startswith("#")])],
15
+ install_requires=_requirements,
14
16
  classifiers=[
15
17
  "Programming Language :: Python :: 3",
16
18
  "License :: OSI Approved :: MIT License",
@@ -18,4 +20,3 @@ setup(
18
20
  ],
19
21
  python_requires=">=3.12",
20
22
  )
21
-
@@ -1,124 +1,124 @@
1
- import json, requests, re
2
- from typing import Any
3
- from abc import ABC, abstractmethod
4
- from langchain_openai import ChatOpenAI
5
- from langchain_core.prompts import ChatPromptTemplate
6
- from langchain_core.messages import AIMessage
7
- from langchain_core.runnables import RunnableSerializable
8
- from langchain_core.runnables import RunnableLambda
9
- from bs4 import BeautifulSoup
10
- from ws_bom_robot_app.llm.models.api import LlmRules
11
- from ws_bom_robot_app.llm.utils.agent_utils import get_rules
12
-
13
- # SafeDict helper class
14
- class SafeDict(dict):
15
- def __missing__(self, key):
16
- return ''
17
-
18
- # Strategy Interface
19
- class AgentDescriptorStrategy(ABC):
20
- @abstractmethod
21
- def enrich_prompt(self, prompt: str, input: dict) -> str:
22
- pass
23
-
24
- @abstractmethod
25
- def rule_input(self, input: dict) -> str:
26
- pass
27
-
28
- # Concrete Strategy for Default Agent
29
- class DefaultAgentDescriptor(AgentDescriptorStrategy):
30
- def enrich_prompt(self, prompt: str, input: dict) -> str:
31
- # Default enrichment logic (could be minimal or no-op)
32
- return prompt.format_map(SafeDict(input))
33
-
34
- def rule_input(self, input: dict) -> str:
35
- return input.get('content', "")
36
-
37
- # Concrete Strategy for URL2Text Agent
38
- class URL2TextAgentDescriptor(AgentDescriptorStrategy):
39
- def enrich_prompt(self, prompt: str, input: dict) -> str:
40
- input["context"] = self._get_page_text(input)
41
- return prompt.format_map(SafeDict(input))
42
-
43
- def rule_input(self, input: dict) -> str:
44
- return input.get('context', "")
45
-
46
- def _get_page_text(self, input: dict) -> str:
47
- url = input.get("content", "")
48
- exclusions = input.get("exclude", {})
49
- response = requests.get(url)
50
- response.raise_for_status()
51
- soup = BeautifulSoup(response.content, 'html5lib')
52
- classes_to_exclude = exclusions.get("classes", [])
53
- ids_to_exclude = exclusions.get("ids", [])
54
- for class_name in classes_to_exclude:
55
- for element in soup.find_all(class_=class_name):
56
- element.extract()
57
- for id_name in ids_to_exclude:
58
- for element in soup.find_all(id=id_name):
59
- element.extract()
60
- for script in soup(["script", "noscript", "style", "head", "footer", "iframe"]):
61
- script.extract()
62
- return re.sub(' +', ' ', soup.get_text())
63
-
64
-
65
- class AgentDescriptor:
66
- # Dictionary to hold all agent strategies
67
- _list: dict[str,AgentDescriptorStrategy] = {
68
- "default": DefaultAgentDescriptor(),
69
- "url2text": URL2TextAgentDescriptor(),
70
- }
71
-
72
- # Functions to manage strategies
73
- @staticmethod
74
- def add_strategy(name: str, strategy: AgentDescriptorStrategy):
75
- """_summary_
76
- add a new strategy to the dictionary
77
- Args:
78
- name (str): name of the strategy, in lowercase
79
- strategy (AgentDescriptorStrategy): class implementing the strategy
80
- Examples:
81
- AgentDescriptor.add_strategy("custom_agent_descriptor", CustomAgentDescriptor())
82
- """
83
- AgentDescriptor._list[name.lower()] = strategy
84
-
85
- @staticmethod
86
- def get_strategy(name: str) -> AgentDescriptorStrategy:
87
- return AgentDescriptor._list.get(name.lower(), DefaultAgentDescriptor())
88
-
89
- def __init__(self, api_key: str, prompt: str, mode: str, rules: LlmRules = None):
90
- self.__prompt = prompt
91
- self.__llm = ChatOpenAI(model="gpt-4o", temperature=0, api_key=api_key) # type: ignore
92
- self.api_key = api_key
93
- self.rules= rules
94
- self.strategy = self.get_strategy(mode) # Selects the strategy from the dictionary
95
-
96
- async def __create_prompt(self, input_dict: dict):
97
- input_data = json.loads(input_dict.get("input", {}))
98
- system = self.strategy.enrich_prompt(self.__prompt, input_data)
99
- if self.rules:
100
- rule_input = self.strategy.rule_input(input_data)
101
- rules_prompt = await get_rules(self.rules,self.api_key, rule_input)
102
- system += rules_prompt
103
- return ChatPromptTemplate.from_messages(
104
- [
105
- ("system", system),
106
- ("user", input_data.get("content", ""))
107
- ]
108
- )
109
-
110
- def __create_agent_descriptor(self, content) -> RunnableSerializable[Any, Any]:
111
- content = json.loads(content)
112
- agent = (
113
- {
114
- "input": lambda x: x["input"],
115
- }
116
- | RunnableLambda(self.__create_prompt)
117
- | self.__llm
118
- )
119
- return agent
120
-
121
- async def run_agent(self, content) -> Any:
122
- agent_descriptor = self.__create_agent_descriptor(content)
123
- response: AIMessage = await agent_descriptor.ainvoke({"input": content})
124
- return response
1
+ import json, requests, re
2
+ from typing import Any
3
+ from abc import ABC, abstractmethod
4
+ from langchain_openai import ChatOpenAI
5
+ from langchain_core.prompts import ChatPromptTemplate
6
+ from langchain_core.messages import AIMessage
7
+ from langchain_core.runnables import RunnableSerializable
8
+ from langchain_core.runnables import RunnableLambda
9
+ from bs4 import BeautifulSoup
10
+ from ws_bom_robot_app.llm.models.api import LlmRules
11
+ from ws_bom_robot_app.llm.utils.agent_utils import get_rules
12
+
13
+ # SafeDict helper class
14
+ class SafeDict(dict):
15
+ def __missing__(self, key):
16
+ return ''
17
+
18
+ # Strategy Interface
19
+ class AgentDescriptorStrategy(ABC):
20
+ @abstractmethod
21
+ def enrich_prompt(self, prompt: str, input: dict) -> str:
22
+ pass
23
+
24
+ @abstractmethod
25
+ def rule_input(self, input: dict) -> str:
26
+ pass
27
+
28
+ # Concrete Strategy for Default Agent
29
+ class DefaultAgentDescriptor(AgentDescriptorStrategy):
30
+ def enrich_prompt(self, prompt: str, input: dict) -> str:
31
+ # Default enrichment logic (could be minimal or no-op)
32
+ return prompt.format_map(SafeDict(input))
33
+
34
+ def rule_input(self, input: dict) -> str:
35
+ return input.get('content', "")
36
+
37
+ # Concrete Strategy for URL2Text Agent
38
+ class URL2TextAgentDescriptor(AgentDescriptorStrategy):
39
+ def enrich_prompt(self, prompt: str, input: dict) -> str:
40
+ input["context"] = self._get_page_text(input)
41
+ return prompt.format_map(SafeDict(input))
42
+
43
+ def rule_input(self, input: dict) -> str:
44
+ return input.get('context', "")
45
+
46
+ def _get_page_text(self, input: dict) -> str:
47
+ url = input.get("content", "")
48
+ exclusions = input.get("exclude", {})
49
+ response = requests.get(url)
50
+ response.raise_for_status()
51
+ soup = BeautifulSoup(response.content, 'html5lib')
52
+ classes_to_exclude = exclusions.get("classes", [])
53
+ ids_to_exclude = exclusions.get("ids", [])
54
+ for class_name in classes_to_exclude:
55
+ for element in soup.find_all(class_=class_name):
56
+ element.extract()
57
+ for id_name in ids_to_exclude:
58
+ for element in soup.find_all(id=id_name):
59
+ element.extract()
60
+ for script in soup(["script", "noscript", "style", "head", "footer", "iframe"]):
61
+ script.extract()
62
+ return re.sub(' +', ' ', soup.get_text())
63
+
64
+
65
+ class AgentDescriptor:
66
+ # Dictionary to hold all agent strategies
67
+ _list: dict[str,AgentDescriptorStrategy] = {
68
+ "default": DefaultAgentDescriptor(),
69
+ "url2text": URL2TextAgentDescriptor(),
70
+ }
71
+
72
+ # Functions to manage strategies
73
+ @staticmethod
74
+ def add_strategy(name: str, strategy: AgentDescriptorStrategy):
75
+ """_summary_
76
+ add a new strategy to the dictionary
77
+ Args:
78
+ name (str): name of the strategy, in lowercase
79
+ strategy (AgentDescriptorStrategy): class implementing the strategy
80
+ Examples:
81
+ AgentDescriptor.add_strategy("custom_agent_descriptor", CustomAgentDescriptor())
82
+ """
83
+ AgentDescriptor._list[name.lower()] = strategy
84
+
85
+ @staticmethod
86
+ def get_strategy(name: str) -> AgentDescriptorStrategy:
87
+ return AgentDescriptor._list.get(name.lower(), DefaultAgentDescriptor())
88
+
89
+ def __init__(self, api_key: str, prompt: str, mode: str, rules: LlmRules = None):
90
+ self.__prompt = prompt
91
+ self.__llm = ChatOpenAI(model="gpt-4o", temperature=0, api_key=api_key) # type: ignore
92
+ self.api_key = api_key
93
+ self.rules= rules
94
+ self.strategy = self.get_strategy(mode) # Selects the strategy from the dictionary
95
+
96
+ async def __create_prompt(self, input_dict: dict):
97
+ input_data = json.loads(input_dict.get("input", {}))
98
+ system = self.strategy.enrich_prompt(self.__prompt, input_data)
99
+ if self.rules:
100
+ rule_input = self.strategy.rule_input(input_data)
101
+ rules_prompt = await get_rules(self.rules,self.api_key, rule_input)
102
+ system += rules_prompt
103
+ return ChatPromptTemplate.from_messages(
104
+ [
105
+ ("system", system),
106
+ ("user", input_data.get("content", ""))
107
+ ]
108
+ )
109
+
110
+ def __create_agent_descriptor(self, content) -> RunnableSerializable[Any, Any]:
111
+ content = json.loads(content)
112
+ agent = (
113
+ {
114
+ "input": lambda x: x["input"],
115
+ }
116
+ | RunnableLambda(self.__create_prompt)
117
+ | self.__llm
118
+ )
119
+ return agent
120
+
121
+ async def run_agent(self, content) -> Any:
122
+ agent_descriptor = self.__create_agent_descriptor(content)
123
+ response: AIMessage = await agent_descriptor.ainvoke({"input": content})
124
+ return response