ws-bom-robot-app 0.0.60__tar.gz → 0.0.62__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. {ws_bom_robot_app-0.0.60/ws_bom_robot_app.egg-info → ws_bom_robot_app-0.0.62}/PKG-INFO +17 -17
  2. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/requirements.txt +18 -17
  3. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/setup.py +1 -1
  4. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/config.py +2 -3
  5. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/cron_manager.py +2 -2
  6. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/agent_description.py +123 -123
  7. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/agent_handler.py +177 -177
  8. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/agent_lcel.py +45 -46
  9. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/api.py +12 -0
  10. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/defaut_prompt.py +15 -15
  11. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/feedbacks/feedback_manager.py +66 -74
  12. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/main.py +134 -134
  13. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/models/api.py +6 -0
  14. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/models/feedback.py +30 -30
  15. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/nebuly_handler.py +182 -173
  16. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/settings.py +4 -4
  17. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/tools/models/main.py +4 -0
  18. ws_bom_robot_app-0.0.62/ws_bom_robot_app/llm/tools/tool_builder.py +65 -0
  19. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/tools/tool_manager.py +312 -228
  20. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/tools/utils.py +41 -41
  21. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/utils/agent.py +34 -34
  22. ws_bom_robot_app-0.0.62/ws_bom_robot_app/llm/utils/cms.py +77 -0
  23. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/utils/download.py +79 -79
  24. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/utils/print.py +29 -29
  25. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/generator.py +137 -137
  26. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/loader/json_loader.py +25 -25
  27. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/task_manager.py +3 -1
  28. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/util.py +59 -20
  29. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62/ws_bom_robot_app.egg-info}/PKG-INFO +17 -17
  30. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app.egg-info/SOURCES.txt +1 -0
  31. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app.egg-info/requires.txt +16 -16
  32. ws_bom_robot_app-0.0.60/ws_bom_robot_app/llm/tools/tool_builder.py +0 -23
  33. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/MANIFEST.in +0 -0
  34. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/README.md +0 -0
  35. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/pyproject.toml +0 -0
  36. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/setup.cfg +0 -0
  37. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/__init__.py +0 -0
  38. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/auth.py +0 -0
  39. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/__init__.py +0 -0
  40. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/agent_context.py +0 -0
  41. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/feedbacks/__init__.py +0 -0
  42. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/models/__init__.py +0 -0
  43. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/models/base.py +0 -0
  44. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/models/kb.py +0 -0
  45. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/providers/__init__.py +0 -0
  46. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/providers/llm_manager.py +0 -0
  47. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/tools/__init__.py +0 -0
  48. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/tools/models/__init__.py +0 -0
  49. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/utils/__init__.py +0 -0
  50. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/utils/chunker.py +0 -0
  51. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/utils/kb.py +0 -0
  52. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/utils/secrets.py +0 -0
  53. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/utils/webhooks.py +0 -0
  54. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/__init__.py +0 -0
  55. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/db/__init__.py +0 -0
  56. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/db/base.py +0 -0
  57. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/db/chroma.py +0 -0
  58. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/db/faiss.py +0 -0
  59. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/db/manager.py +0 -0
  60. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/db/qdrant.py +0 -0
  61. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/integration/__init__.py +0 -0
  62. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/integration/azure.py +0 -0
  63. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/integration/base.py +0 -0
  64. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/integration/confluence.py +0 -0
  65. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/integration/dropbox.py +0 -0
  66. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/integration/gcs.py +0 -0
  67. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/integration/github.py +0 -0
  68. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/integration/googledrive.py +0 -0
  69. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/integration/jira.py +0 -0
  70. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/integration/manager.py +0 -0
  71. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/integration/s3.py +0 -0
  72. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/integration/sftp.py +0 -0
  73. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/integration/sharepoint.py +0 -0
  74. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/integration/sitemap.py +0 -0
  75. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/integration/slack.py +0 -0
  76. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/loader/__init__.py +0 -0
  77. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/loader/base.py +0 -0
  78. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/llm/vector_store/loader/docling.py +0 -0
  79. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app/main.py +0 -0
  80. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app.egg-info/dependency_links.txt +0 -0
  81. {ws_bom_robot_app-0.0.60 → ws_bom_robot_app-0.0.62}/ws_bom_robot_app.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.60
3
+ Version: 0.0.62
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -13,24 +13,23 @@ Description-Content-Type: text/markdown
13
13
  Requires-Dist: standardwebhooks==1.0.0
14
14
  Requires-Dist: apscheduler==3.11.0
15
15
  Requires-Dist: aiofiles==24.1.0
16
- Requires-Dist: pydantic==2.10.6
17
- Requires-Dist: pydantic-settings==2.7.1
18
- Requires-Dist: fastapi[standard]==0.115.8
16
+ Requires-Dist: pydantic==2.11.7
17
+ Requires-Dist: pydantic-settings==2.10.1
18
+ Requires-Dist: fastapi[standard]==0.115.14
19
19
  Requires-Dist: chevron==0.14.0
20
- Requires-Dist: trafilatura==2.0.0
21
- Requires-Dist: langchain==0.3.25
22
- Requires-Dist: langchain-community==0.3.24
23
- Requires-Dist: langchain-core==0.3.59
24
- Requires-Dist: langchain-openai==0.3.16
20
+ Requires-Dist: langchain==0.3.26
21
+ Requires-Dist: langchain-community==0.3.26
22
+ Requires-Dist: langchain-core==0.3.67
23
+ Requires-Dist: langchain-openai==0.3.27
25
24
  Requires-Dist: langchain-anthropic==0.3.6
26
25
  Requires-Dist: langchain-google-genai==2.0.7
27
- Requires-Dist: langchain-google-vertexai==2.0.13
28
- Requires-Dist: langchain-groq==0.3.2
29
- Requires-Dist: langchain-ollama==0.3.2
30
- Requires-Dist: faiss-cpu==1.9.0
31
- Requires-Dist: chromadb==0.6.3
32
- Requires-Dist: langchain_chroma==0.2.1
33
- Requires-Dist: fastembed==0.5.1
26
+ Requires-Dist: langchain-google-vertexai==2.0.27
27
+ Requires-Dist: langchain-groq==0.3.5
28
+ Requires-Dist: langchain-ollama==0.3.3
29
+ Requires-Dist: faiss-cpu==1.11.0
30
+ Requires-Dist: chromadb==1.0.13
31
+ Requires-Dist: langchain_chroma==0.2.4
32
+ Requires-Dist: fastembed==0.7.1
34
33
  Requires-Dist: langchain-qdrant==0.2.0
35
34
  Requires-Dist: lark==1.2.2
36
35
  Requires-Dist: unstructured==0.16.21
@@ -48,9 +47,10 @@ Requires-Dist: unstructured-ingest[sftp]
48
47
  Requires-Dist: unstructured-ingest[sharepoint]
49
48
  Requires-Dist: unstructured-ingest[slack]
50
49
  Requires-Dist: html5lib==1.1
51
- Requires-Dist: markdownify==0.14.1
50
+ Requires-Dist: markdownify==1.1.0
52
51
  Requires-Dist: duckduckgo-search==8.0.4
53
52
  Requires-Dist: langchain_google_community==2.0.7
53
+ Requires-Dist: trafilatura==2.0.0
54
54
  Dynamic: author
55
55
  Dynamic: author-email
56
56
  Dynamic: classifier
@@ -2,32 +2,30 @@
2
2
  standardwebhooks==1.0.0
3
3
  apscheduler==3.11.0
4
4
  aiofiles==24.1.0
5
- pydantic==2.10.6
6
- pydantic-settings==2.7.1
7
- fastapi[standard]==0.115.8
5
+ pydantic==2.11.7
6
+ pydantic-settings==2.10.1
7
+ fastapi[standard]==0.115.14
8
8
  chevron==0.14.0
9
- trafilatura==2.0.0
10
9
 
11
10
  #framework
12
- langchain==0.3.25
13
- langchain-community==0.3.24
14
- langchain-core==0.3.59
15
- langchain-openai==0.3.16
11
+ langchain==0.3.26
12
+ langchain-community==0.3.26
13
+ langchain-core==0.3.67
14
+ langchain-openai==0.3.27
16
15
  langchain-anthropic==0.3.6 #issue get_models() from 0.3.7
17
16
  langchain-google-genai==2.0.7 #waiting for new release: https://github.com/langchain-ai/langchain-google/issues/711
18
- langchain-google-vertexai==2.0.13
19
- langchain-groq==0.3.2
20
- langchain-ollama==0.3.2
17
+ langchain-google-vertexai==2.0.27
18
+ langchain-groq==0.3.5
19
+ langchain-ollama==0.3.3
21
20
 
22
21
  #vector DB
23
- faiss-cpu==1.9.0
24
- chromadb==0.6.3
25
- langchain_chroma==0.2.1
26
- fastembed==0.5.1 #qdrant sparse embedding
22
+ faiss-cpu==1.11.0
23
+ chromadb==1.0.13
24
+ langchain_chroma==0.2.4
25
+ fastembed==0.7.1 #qdrant sparse embedding
27
26
  langchain-qdrant==0.2.0
28
27
  lark==1.2.2 #self-query retriever
29
28
 
30
-
31
29
  #loaders
32
30
  unstructured==0.16.21
33
31
  unstructured[image]
@@ -46,6 +44,9 @@ unstructured-ingest[slack]
46
44
  html5lib==1.1 #beautifulsoup4 parser
47
45
 
48
46
  #integrations
49
- markdownify==0.14.1 #sitemap
47
+ markdownify==1.1.0 #sitemap
48
+
49
+ ##tools
50
50
  duckduckgo-search==8.0.4
51
51
  langchain_google_community==2.0.7
52
+ trafilatura==2.0.0
@@ -4,7 +4,7 @@ _requirements = [line.split('#')[0].strip() for line in open("requirements.txt")
4
4
 
5
5
  setup(
6
6
  name="ws_bom_robot_app",
7
- version="0.0.60",
7
+ version="0.0.62",
8
8
  description="A FastAPI application serving ws bom/robot/llm platform ai.",
9
9
  long_description=open("README.md", encoding='utf-8').read(),
10
10
  long_description_content_type="text/markdown",
@@ -4,6 +4,7 @@ from pydantic_settings import BaseSettings
4
4
  import os
5
5
 
6
6
  class Settings(BaseSettings):
7
+ USER_AGENT: str = 'ws-bom-robot'
7
8
  robot_env: str = 'local'
8
9
  robot_user: str = 'user'
9
10
  robot_password: str = 'password'
@@ -28,7 +29,6 @@ class Settings(BaseSettings):
28
29
  GOOGLE_API_KEY: str = ''
29
30
  NEBULY_API_URL: str =''
30
31
  GOOGLE_APPLICATION_CREDENTIALS: str = '' # path to google credentials iam file, e.d. ./.secrets/google-credentials.json
31
- TAVILY_API_KEY: str = '' #TODO DELETE
32
32
  model_config = ConfigDict(
33
33
  env_file='./.env',
34
34
  extra='ignore',
@@ -36,6 +36,7 @@ class Settings(BaseSettings):
36
36
  )
37
37
  def __init__(self, **kwargs):
38
38
  super().__init__(**kwargs)
39
+ os.environ["USER_AGENT"] = self.USER_AGENT
39
40
  os.environ["OPENAI_API_KEY"] = self.OPENAI_API_KEY
40
41
  os.environ["OLLAMA_API_URL"] = self.OLLAMA_API_URL
41
42
  os.environ["ANTHROPIC_API_KEY"] = self.ANTHROPIC_API_KEY
@@ -44,8 +45,6 @@ class Settings(BaseSettings):
44
45
  os.environ["GOOGLE_API_KEY"] = self.GOOGLE_API_KEY
45
46
  os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.GOOGLE_APPLICATION_CREDENTIALS
46
47
  os.environ["NEBULY_API_URL"] = self.NEBULY_API_URL
47
- # TODO DELETE
48
- os.environ["TAVILY_API_KEY"] = self.TAVILY_API_KEY
49
48
 
50
49
  class RuntimeOptions(BaseModel):
51
50
  @staticmethod
@@ -59,14 +59,14 @@ class CronManager:
59
59
  Job('cleanup-task',task_manager.cleanup_task, interval=5 * 60),
60
60
  Job('cleanup-data',kb_cleanup_data_file, interval=180 * 60),
61
61
  ]
62
- def __get_jobstore_strategy() -> JobstoreStrategy:
62
+ def __get_jobstore_strategy(self) -> JobstoreStrategy:
63
63
  if True or config.runtime_options().is_multi_process:
64
64
  return MemoryJobstoreStrategy()
65
65
  return PersistentJobstoreStrategy()
66
66
  def __init__(self, strategy: JobstoreStrategy = None, enable_defaults: bool = True):
67
67
  self.enable_defaults = enable_defaults
68
68
  if strategy is None:
69
- strategy = CronManager.__get_jobstore_strategy()
69
+ strategy = self.__get_jobstore_strategy()
70
70
  jobstores = strategy.get_jobstore()
71
71
  self.scheduler: BackgroundScheduler = BackgroundScheduler(jobstores=jobstores)
72
72
  self.__scheduler_is_running = False
@@ -1,123 +1,123 @@
1
- import json, requests, re
2
- from typing import Any
3
- from abc import ABC, abstractmethod
4
- from langchain_core.prompts import ChatPromptTemplate
5
- from langchain_core.messages import AIMessage
6
- from langchain_core.runnables import RunnableSerializable
7
- from langchain_core.runnables import RunnableLambda
8
- from bs4 import BeautifulSoup
9
- from ws_bom_robot_app.llm.models.api import LlmRules
10
- from ws_bom_robot_app.llm.providers.llm_manager import LlmInterface
11
- from ws_bom_robot_app.llm.utils.agent import get_rules
12
-
13
- # SafeDict helper class
14
- class SafeDict(dict):
15
- def __missing__(self, key):
16
- return ''
17
-
18
- # Strategy Interface
19
- class AgentDescriptorStrategy(ABC):
20
- @abstractmethod
21
- def enrich_prompt(self, prompt: str, input: dict) -> str:
22
- pass
23
-
24
- @abstractmethod
25
- def rule_input(self, input: dict) -> str:
26
- pass
27
-
28
- # Concrete Strategy for Default Agent
29
- class DefaultAgentDescriptor(AgentDescriptorStrategy):
30
- def enrich_prompt(self, prompt: str, input: dict) -> str:
31
- # Default enrichment logic (could be minimal or no-op)
32
- return prompt.format_map(SafeDict(input))
33
-
34
- def rule_input(self, input: dict) -> str:
35
- return input.get('content', "")
36
-
37
- # Concrete Strategy for URL2Text Agent
38
- class URL2TextAgentDescriptor(AgentDescriptorStrategy):
39
- def enrich_prompt(self, prompt: str, input: dict) -> str:
40
- input["context"] = self._get_page_text(input)
41
- return prompt.format_map(SafeDict(input))
42
-
43
- def rule_input(self, input: dict) -> str:
44
- return input.get('context', "")
45
-
46
- def _get_page_text(self, input: dict) -> str:
47
- url = input.get("content", "")
48
- exclusions = input.get("exclude", {})
49
- response = requests.get(url)
50
- response.raise_for_status()
51
- soup = BeautifulSoup(response.content, 'html5lib')
52
- classes_to_exclude = exclusions.get("classes", [])
53
- ids_to_exclude = exclusions.get("ids", [])
54
- for class_name in classes_to_exclude:
55
- for element in soup.find_all(class_=class_name):
56
- element.extract()
57
- for id_name in ids_to_exclude:
58
- for element in soup.find_all(id=id_name):
59
- element.extract()
60
- for script in soup(["script", "noscript", "style", "head", "footer", "iframe"]):
61
- script.extract()
62
- return re.sub(' +', ' ', soup.get_text())
63
-
64
-
65
- class AgentDescriptor:
66
- # Dictionary to hold all agent strategies
67
- _list: dict[str,AgentDescriptorStrategy] = {
68
- "default": DefaultAgentDescriptor(),
69
- "url2text": URL2TextAgentDescriptor(),
70
- }
71
-
72
- # Functions to manage strategies
73
- @staticmethod
74
- def add_strategy(name: str, strategy: AgentDescriptorStrategy):
75
- """_summary_
76
- add a new strategy to the dictionary
77
- Args:
78
- name (str): name of the strategy, in lowercase
79
- strategy (AgentDescriptorStrategy): class implementing the strategy
80
- Examples:
81
- AgentDescriptor.add_strategy("custom_agent_descriptor", CustomAgentDescriptor())
82
- """
83
- AgentDescriptor._list[name.lower()] = strategy
84
-
85
- @staticmethod
86
- def get_strategy(name: str) -> AgentDescriptorStrategy:
87
- return AgentDescriptor._list.get(name.lower(), DefaultAgentDescriptor())
88
-
89
- def __init__(self, llm: LlmInterface, prompt: str, mode: str, rules: LlmRules = None):
90
- self.__prompt = prompt
91
- self.__llm = llm
92
- self.rules= rules
93
- self.strategy = self.get_strategy(mode) # Selects the strategy from the dictionary
94
-
95
- async def __create_prompt(self, input_dict: dict):
96
- input_data = json.loads(input_dict.get("input", {}))
97
- system = self.strategy.enrich_prompt(self.__prompt, input_data)
98
- if self.rules:
99
- rule_input = self.strategy.rule_input(input_data)
100
- rules_prompt = await get_rules(self.__llm.get_embeddings(), self.rules, rule_input)
101
- system += rules_prompt
102
- return ChatPromptTemplate.from_messages(
103
- [
104
- ("system", system),
105
- ("user", input_data.get("content", ""))
106
- ]
107
- )
108
-
109
- def __create_agent_descriptor(self, content) -> RunnableSerializable[Any, Any]:
110
- content = json.loads(content)
111
- agent = (
112
- {
113
- "input": lambda x: x["input"],
114
- }
115
- | RunnableLambda(self.__create_prompt)
116
- | self.__llm.get_llm()
117
- )
118
- return agent
119
-
120
- async def run_agent(self, content) -> Any:
121
- agent_descriptor = self.__create_agent_descriptor(content)
122
- response: AIMessage = await agent_descriptor.ainvoke({"input": content})
123
- return response
1
+ import json, requests, re
2
+ from typing import Any
3
+ from abc import ABC, abstractmethod
4
+ from langchain_core.prompts import ChatPromptTemplate
5
+ from langchain_core.messages import AIMessage
6
+ from langchain_core.runnables import RunnableSerializable
7
+ from langchain_core.runnables import RunnableLambda
8
+ from bs4 import BeautifulSoup
9
+ from ws_bom_robot_app.llm.models.api import LlmRules
10
+ from ws_bom_robot_app.llm.providers.llm_manager import LlmInterface
11
+ from ws_bom_robot_app.llm.utils.agent import get_rules
12
+
13
+ # SafeDict helper class
14
+ class SafeDict(dict):
15
+ def __missing__(self, key):
16
+ return ''
17
+
18
+ # Strategy Interface
19
+ class AgentDescriptorStrategy(ABC):
20
+ @abstractmethod
21
+ def enrich_prompt(self, prompt: str, input: dict) -> str:
22
+ pass
23
+
24
+ @abstractmethod
25
+ def rule_input(self, input: dict) -> str:
26
+ pass
27
+
28
+ # Concrete Strategy for Default Agent
29
+ class DefaultAgentDescriptor(AgentDescriptorStrategy):
30
+ def enrich_prompt(self, prompt: str, input: dict) -> str:
31
+ # Default enrichment logic (could be minimal or no-op)
32
+ return prompt.format_map(SafeDict(input))
33
+
34
+ def rule_input(self, input: dict) -> str:
35
+ return input.get('content', "")
36
+
37
+ # Concrete Strategy for URL2Text Agent
38
+ class URL2TextAgentDescriptor(AgentDescriptorStrategy):
39
+ def enrich_prompt(self, prompt: str, input: dict) -> str:
40
+ input["context"] = self._get_page_text(input)
41
+ return prompt.format_map(SafeDict(input))
42
+
43
+ def rule_input(self, input: dict) -> str:
44
+ return input.get('context', "")
45
+
46
+ def _get_page_text(self, input: dict) -> str:
47
+ url = input.get("content", "")
48
+ exclusions = input.get("exclude", {})
49
+ response = requests.get(url)
50
+ response.raise_for_status()
51
+ soup = BeautifulSoup(response.content, 'html5lib')
52
+ classes_to_exclude = exclusions.get("classes", [])
53
+ ids_to_exclude = exclusions.get("ids", [])
54
+ for class_name in classes_to_exclude:
55
+ for element in soup.find_all(class_=class_name):
56
+ element.extract()
57
+ for id_name in ids_to_exclude:
58
+ for element in soup.find_all(id=id_name):
59
+ element.extract()
60
+ for script in soup(["script", "noscript", "style", "head", "footer", "iframe"]):
61
+ script.extract()
62
+ return re.sub(' +', ' ', soup.get_text())
63
+
64
+
65
+ class AgentDescriptor:
66
+ # Dictionary to hold all agent strategies
67
+ _list: dict[str,AgentDescriptorStrategy] = {
68
+ "default": DefaultAgentDescriptor(),
69
+ "url2text": URL2TextAgentDescriptor(),
70
+ }
71
+
72
+ # Functions to manage strategies
73
+ @staticmethod
74
+ def add_strategy(name: str, strategy: AgentDescriptorStrategy):
75
+ """_summary_
76
+ add a new strategy to the dictionary
77
+ Args:
78
+ name (str): name of the strategy, in lowercase
79
+ strategy (AgentDescriptorStrategy): class implementing the strategy
80
+ Examples:
81
+ AgentDescriptor.add_strategy("custom_agent_descriptor", CustomAgentDescriptor())
82
+ """
83
+ AgentDescriptor._list[name.lower()] = strategy
84
+
85
+ @staticmethod
86
+ def get_strategy(name: str) -> AgentDescriptorStrategy:
87
+ return AgentDescriptor._list.get(name.lower(), DefaultAgentDescriptor())
88
+
89
+ def __init__(self, llm: LlmInterface, prompt: str, mode: str, rules: LlmRules = None):
90
+ self.__prompt = prompt
91
+ self.__llm = llm
92
+ self.rules= rules
93
+ self.strategy = self.get_strategy(mode) # Selects the strategy from the dictionary
94
+
95
+ async def __create_prompt(self, input_dict: dict):
96
+ input_data = json.loads(input_dict.get("input", {}))
97
+ system = self.strategy.enrich_prompt(self.__prompt, input_data)
98
+ if self.rules:
99
+ rule_input = self.strategy.rule_input(input_data)
100
+ rules_prompt = await get_rules(self.__llm.get_embeddings(), self.rules, rule_input)
101
+ system += rules_prompt
102
+ return ChatPromptTemplate.from_messages(
103
+ [
104
+ ("system", system),
105
+ ("user", input_data.get("content", ""))
106
+ ]
107
+ )
108
+
109
+ def __create_agent_descriptor(self, content) -> RunnableSerializable[Any, Any]:
110
+ content = json.loads(content)
111
+ agent = (
112
+ {
113
+ "input": lambda x: x["input"],
114
+ }
115
+ | RunnableLambda(self.__create_prompt)
116
+ | self.__llm.get_llm()
117
+ )
118
+ return agent
119
+
120
+ async def run_agent(self, content) -> Any:
121
+ agent_descriptor = self.__create_agent_descriptor(content)
122
+ response: AIMessage = await agent_descriptor.ainvoke({"input": content})
123
+ return response