ws-bom-robot-app 0.0.37__py3-none-any.whl → 0.0.103__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ws_bom_robot_app/config.py +35 -7
- ws_bom_robot_app/cron_manager.py +15 -14
- ws_bom_robot_app/llm/agent_context.py +26 -0
- ws_bom_robot_app/llm/agent_description.py +123 -123
- ws_bom_robot_app/llm/agent_handler.py +176 -180
- ws_bom_robot_app/llm/agent_lcel.py +107 -54
- ws_bom_robot_app/llm/api.py +100 -7
- ws_bom_robot_app/llm/defaut_prompt.py +15 -15
- ws_bom_robot_app/llm/evaluator.py +319 -0
- ws_bom_robot_app/llm/feedbacks/__init__.py +0 -0
- ws_bom_robot_app/llm/feedbacks/feedback_manager.py +66 -0
- ws_bom_robot_app/llm/main.py +159 -110
- ws_bom_robot_app/llm/models/api.py +70 -5
- ws_bom_robot_app/llm/models/feedback.py +30 -0
- ws_bom_robot_app/llm/nebuly_handler.py +185 -0
- ws_bom_robot_app/llm/providers/llm_manager.py +244 -80
- ws_bom_robot_app/llm/tools/models/main.py +8 -0
- ws_bom_robot_app/llm/tools/tool_builder.py +68 -23
- ws_bom_robot_app/llm/tools/tool_manager.py +343 -133
- ws_bom_robot_app/llm/tools/utils.py +41 -25
- ws_bom_robot_app/llm/utils/agent.py +34 -0
- ws_bom_robot_app/llm/utils/chunker.py +6 -1
- ws_bom_robot_app/llm/utils/cleanup.py +81 -0
- ws_bom_robot_app/llm/utils/cms.py +123 -0
- ws_bom_robot_app/llm/utils/download.py +183 -79
- ws_bom_robot_app/llm/utils/print.py +29 -29
- ws_bom_robot_app/llm/vector_store/db/__init__.py +0 -0
- ws_bom_robot_app/llm/vector_store/db/base.py +193 -0
- ws_bom_robot_app/llm/vector_store/db/chroma.py +97 -0
- ws_bom_robot_app/llm/vector_store/db/faiss.py +91 -0
- ws_bom_robot_app/llm/vector_store/db/manager.py +15 -0
- ws_bom_robot_app/llm/vector_store/db/qdrant.py +73 -0
- ws_bom_robot_app/llm/vector_store/generator.py +137 -137
- ws_bom_robot_app/llm/vector_store/integration/api.py +216 -0
- ws_bom_robot_app/llm/vector_store/integration/azure.py +1 -1
- ws_bom_robot_app/llm/vector_store/integration/base.py +58 -15
- ws_bom_robot_app/llm/vector_store/integration/confluence.py +41 -11
- ws_bom_robot_app/llm/vector_store/integration/dropbox.py +1 -1
- ws_bom_robot_app/llm/vector_store/integration/gcs.py +1 -1
- ws_bom_robot_app/llm/vector_store/integration/github.py +22 -22
- ws_bom_robot_app/llm/vector_store/integration/googledrive.py +46 -17
- ws_bom_robot_app/llm/vector_store/integration/jira.py +112 -75
- ws_bom_robot_app/llm/vector_store/integration/manager.py +6 -2
- ws_bom_robot_app/llm/vector_store/integration/s3.py +1 -1
- ws_bom_robot_app/llm/vector_store/integration/sftp.py +1 -1
- ws_bom_robot_app/llm/vector_store/integration/sharepoint.py +7 -14
- ws_bom_robot_app/llm/vector_store/integration/shopify.py +143 -0
- ws_bom_robot_app/llm/vector_store/integration/sitemap.py +9 -1
- ws_bom_robot_app/llm/vector_store/integration/slack.py +3 -2
- ws_bom_robot_app/llm/vector_store/integration/thron.py +236 -0
- ws_bom_robot_app/llm/vector_store/loader/base.py +52 -8
- ws_bom_robot_app/llm/vector_store/loader/docling.py +71 -33
- ws_bom_robot_app/llm/vector_store/loader/json_loader.py +25 -25
- ws_bom_robot_app/main.py +148 -146
- ws_bom_robot_app/subprocess_runner.py +106 -0
- ws_bom_robot_app/task_manager.py +207 -54
- ws_bom_robot_app/util.py +65 -20
- ws_bom_robot_app-0.0.103.dist-info/METADATA +364 -0
- ws_bom_robot_app-0.0.103.dist-info/RECORD +76 -0
- {ws_bom_robot_app-0.0.37.dist-info → ws_bom_robot_app-0.0.103.dist-info}/WHEEL +1 -1
- ws_bom_robot_app/llm/settings.py +0 -4
- ws_bom_robot_app/llm/utils/agent_utils.py +0 -17
- ws_bom_robot_app/llm/utils/kb.py +0 -34
- ws_bom_robot_app-0.0.37.dist-info/METADATA +0 -277
- ws_bom_robot_app-0.0.37.dist-info/RECORD +0 -60
- {ws_bom_robot_app-0.0.37.dist-info → ws_bom_robot_app-0.0.103.dist-info}/top_level.txt +0 -0
ws_bom_robot_app/config.py
CHANGED
|
@@ -4,6 +4,7 @@ from pydantic_settings import BaseSettings
|
|
|
4
4
|
import os
|
|
5
5
|
|
|
6
6
|
class Settings(BaseSettings):
|
|
7
|
+
USER_AGENT: str = 'ws-bom-robot'
|
|
7
8
|
robot_env: str = 'local'
|
|
8
9
|
robot_user: str = 'user'
|
|
9
10
|
robot_password: str = 'password'
|
|
@@ -13,19 +14,34 @@ class Settings(BaseSettings):
|
|
|
13
14
|
robot_data_db_folder_out: str = 'out'
|
|
14
15
|
robot_data_db_folder_store: str = 'store'
|
|
15
16
|
robot_data_db_retention_days: float = 60
|
|
17
|
+
robot_data_attachment_folder: str = 'attachment'
|
|
18
|
+
robot_data_attachment_retention_days: float = 1
|
|
19
|
+
robot_ingest_max_threads: int = 1 # safe choice to 1, avoid potential process-related issues with Docker
|
|
16
20
|
robot_loader_max_threads: int = 1
|
|
17
21
|
robot_task_max_total_parallelism: int = 2 * (os.cpu_count() or 1)
|
|
18
22
|
robot_task_retention_days: float = 1
|
|
23
|
+
robot_task_strategy: str = 'memory' # memory / db
|
|
24
|
+
robot_task_mp_enable: bool = True
|
|
25
|
+
robot_task_mp_method: str = 'spawn' # spawn / fork
|
|
26
|
+
robot_task_mp_max_retries: int = 1
|
|
27
|
+
robot_task_mp_retry_delay: float = 60 # seconds
|
|
28
|
+
robot_cron_strategy: str = 'memory' # memory / db
|
|
19
29
|
robot_cms_host: str = ''
|
|
20
30
|
robot_cms_auth: str = ''
|
|
21
31
|
robot_cms_db_folder: str = 'llmVectorDb'
|
|
22
32
|
robot_cms_kb_folder: str ='llmKbFile'
|
|
23
33
|
ANTHROPIC_API_KEY: str = ''
|
|
34
|
+
DEEPSEEK_API_KEY: str = ''
|
|
24
35
|
OPENAI_API_KEY: str = '' # used also for saas dall-e api
|
|
25
36
|
OLLAMA_API_URL: str = 'http://localhost:11434'
|
|
26
37
|
GROQ_API_KEY: str = ''
|
|
27
38
|
GOOGLE_API_KEY: str = ''
|
|
28
39
|
GOOGLE_APPLICATION_CREDENTIALS: str = '' # path to google credentials iam file, e.d. ./.secrets/google-credentials.json
|
|
40
|
+
WATSONX_URL: str = ''
|
|
41
|
+
WATSONX_APIKEY: str = ''
|
|
42
|
+
WATSONX_PROJECTID: str = ''
|
|
43
|
+
NEBULY_API_URL: str ='https://backend.nebuly.com/'
|
|
44
|
+
LANGSMITH_API_KEY: str = '' # app-wide api key to run evaluation
|
|
29
45
|
model_config = ConfigDict(
|
|
30
46
|
env_file='./.env',
|
|
31
47
|
extra='ignore',
|
|
@@ -33,16 +49,28 @@ class Settings(BaseSettings):
|
|
|
33
49
|
)
|
|
34
50
|
def __init__(self, **kwargs):
|
|
35
51
|
super().__init__(**kwargs)
|
|
52
|
+
# env
|
|
53
|
+
os.environ["USER_AGENT"] = self.USER_AGENT
|
|
36
54
|
os.environ["OPENAI_API_KEY"] = self.OPENAI_API_KEY
|
|
37
55
|
os.environ["OLLAMA_API_URL"] = self.OLLAMA_API_URL
|
|
38
56
|
os.environ["ANTHROPIC_API_KEY"] = self.ANTHROPIC_API_KEY
|
|
57
|
+
os.environ["DEEPSEEK_API_KEY"] = self.DEEPSEEK_API_KEY
|
|
39
58
|
os.environ["GROQ_API_KEY"] = self.GROQ_API_KEY
|
|
40
59
|
os.environ["GOOGLE_API_KEY"] = self.GOOGLE_API_KEY
|
|
41
60
|
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.GOOGLE_APPLICATION_CREDENTIALS
|
|
61
|
+
os.environ["WATSONX_URL"] = self.WATSONX_URL
|
|
62
|
+
os.environ["WATSONX_APIKEY"] = self.WATSONX_APIKEY
|
|
63
|
+
os.environ["WATSONX_PROJECTID"] = self.WATSONX_PROJECTID
|
|
64
|
+
os.environ["NEBULY_API_URL"] = self.NEBULY_API_URL
|
|
65
|
+
os.environ["LANGSMITH_API_KEY"] = self.LANGSMITH_API_KEY
|
|
66
|
+
# dir
|
|
67
|
+
os.makedirs(self.robot_data_folder, exist_ok=True)
|
|
68
|
+
for subfolder in [self.robot_data_db_folder, self.robot_data_attachment_folder, 'db']:
|
|
69
|
+
os.makedirs(os.path.join(self.robot_data_folder, subfolder), exist_ok=True)
|
|
42
70
|
|
|
43
71
|
class RuntimeOptions(BaseModel):
|
|
44
72
|
@staticmethod
|
|
45
|
-
def
|
|
73
|
+
def _get_sys_arg(arg: str, default: int) -> int:
|
|
46
74
|
"""
|
|
47
75
|
Returns the number of worker processes to use for the application.
|
|
48
76
|
|
|
@@ -60,18 +88,18 @@ class Settings(BaseSettings):
|
|
|
60
88
|
"""
|
|
61
89
|
import sys
|
|
62
90
|
try:
|
|
63
|
-
for i,
|
|
64
|
-
if
|
|
91
|
+
for i, argv in enumerate(sys.argv):
|
|
92
|
+
if argv == f"--{arg}" and i + 1 < len(sys.argv):
|
|
65
93
|
return int(sys.argv[i + 1])
|
|
66
94
|
except (ValueError, IndexError):
|
|
67
95
|
pass
|
|
68
|
-
return
|
|
96
|
+
return default
|
|
69
97
|
debug: bool
|
|
98
|
+
tcp_port: int = _get_sys_arg("port", 6001)
|
|
70
99
|
loader_show_progress: bool
|
|
71
100
|
loader_silent_errors: bool
|
|
72
|
-
number_of_workers: int =
|
|
73
|
-
is_multi_process: bool =
|
|
74
|
-
|
|
101
|
+
number_of_workers: int = _get_sys_arg("workers", 1)
|
|
102
|
+
is_multi_process: bool = _get_sys_arg("workers", 1) > 1
|
|
75
103
|
|
|
76
104
|
def runtime_options(self) -> RuntimeOptions:
|
|
77
105
|
"""_summary_
|
ws_bom_robot_app/cron_manager.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import os
|
|
1
2
|
from apscheduler.schedulers.background import BackgroundScheduler
|
|
2
3
|
#from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
|
3
4
|
from apscheduler.jobstores.memory import MemoryJobStore
|
|
@@ -7,8 +8,7 @@ from apscheduler.triggers.interval import IntervalTrigger
|
|
|
7
8
|
from apscheduler.triggers.date import DateTrigger
|
|
8
9
|
from fastapi import APIRouter
|
|
9
10
|
from datetime import datetime
|
|
10
|
-
from ws_bom_robot_app.
|
|
11
|
-
from ws_bom_robot_app.llm.utils.kb import kb_cleanup_data_file
|
|
11
|
+
from ws_bom_robot_app.llm.utils.cleanup import kb_cleanup_data_file, chat_cleanup_attachment, task_cleanup_history
|
|
12
12
|
from ws_bom_robot_app.util import _log
|
|
13
13
|
from ws_bom_robot_app.config import config
|
|
14
14
|
|
|
@@ -22,8 +22,8 @@ class MemoryJobstoreStrategy(JobstoreStrategy):
|
|
|
22
22
|
return {"default": MemoryJobStore()}
|
|
23
23
|
|
|
24
24
|
class PersistentJobstoreStrategy(JobstoreStrategy):
|
|
25
|
-
def get_jobstore(self, db_url: str = "sqlite
|
|
26
|
-
_log.info(f"Using persistent
|
|
25
|
+
def get_jobstore(self, db_url: str = f"sqlite:///{config.robot_data_folder}/db/jobs.sqlite"):
|
|
26
|
+
_log.info(f"Using persistent cron jobstore with database URL: {db_url}.")
|
|
27
27
|
return {"default": SQLAlchemyJobStore(url=db_url)}
|
|
28
28
|
|
|
29
29
|
class Job:
|
|
@@ -56,17 +56,18 @@ class Job:
|
|
|
56
56
|
|
|
57
57
|
class CronManager:
|
|
58
58
|
_list_default = [
|
|
59
|
-
Job('cleanup-task',
|
|
60
|
-
Job('cleanup-data',kb_cleanup_data_file, interval=
|
|
59
|
+
Job('cleanup-task-history',task_cleanup_history, interval=4 * 60 * 60),
|
|
60
|
+
Job('cleanup-kb-data',kb_cleanup_data_file, interval=8 * 60 * 60),
|
|
61
|
+
Job('cleanup-chat-attachment',chat_cleanup_attachment, interval=6 * 60 * 60),
|
|
61
62
|
]
|
|
62
|
-
def __get_jobstore_strategy() -> JobstoreStrategy:
|
|
63
|
-
if
|
|
63
|
+
def __get_jobstore_strategy(self) -> JobstoreStrategy:
|
|
64
|
+
if config.robot_cron_strategy == 'memory':
|
|
64
65
|
return MemoryJobstoreStrategy()
|
|
65
66
|
return PersistentJobstoreStrategy()
|
|
66
67
|
def __init__(self, strategy: JobstoreStrategy = None, enable_defaults: bool = True):
|
|
67
68
|
self.enable_defaults = enable_defaults
|
|
68
69
|
if strategy is None:
|
|
69
|
-
strategy =
|
|
70
|
+
strategy = self.__get_jobstore_strategy()
|
|
70
71
|
jobstores = strategy.get_jobstore()
|
|
71
72
|
self.scheduler: BackgroundScheduler = BackgroundScheduler(jobstores=jobstores)
|
|
72
73
|
self.__scheduler_is_running = False
|
|
@@ -139,22 +140,22 @@ class CronManager:
|
|
|
139
140
|
|
|
140
141
|
def execute_recurring_jobs(self):
|
|
141
142
|
for job in self.scheduler.get_jobs():
|
|
142
|
-
if job.interval:
|
|
143
|
-
job.
|
|
143
|
+
if job.trigger.interval:
|
|
144
|
+
job.func()
|
|
144
145
|
|
|
145
146
|
def pause_recurring_jobs(self):
|
|
146
147
|
for job in self.scheduler.get_jobs():
|
|
147
|
-
if job.interval:
|
|
148
|
+
if job.trigger.interval:
|
|
148
149
|
self.pause_job(job.id)
|
|
149
150
|
|
|
150
151
|
def resume_recurring_jobs(self):
|
|
151
152
|
for job in self.scheduler.get_jobs():
|
|
152
|
-
if job.interval:
|
|
153
|
+
if job.trigger.interval:
|
|
153
154
|
self.resume_job(job.id)
|
|
154
155
|
|
|
155
156
|
def remove_recurring_jobs(self):
|
|
156
157
|
for job in self.scheduler.get_jobs():
|
|
157
|
-
if job.interval:
|
|
158
|
+
if job.trigger.interval:
|
|
158
159
|
self.remove_job(job.id)
|
|
159
160
|
|
|
160
161
|
def clear(self):
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from pydantic import AliasChoices, BaseModel, ConfigDict, Field
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
class AgentContext(BaseModel):
|
|
7
|
+
class _i18n(BaseModel):
|
|
8
|
+
lg: Optional[str] = "en"
|
|
9
|
+
country: Optional[str] = "US"
|
|
10
|
+
timestamp: Optional[str] = Field(default_factory=lambda: datetime.now().strftime("%Y-%m-%d %H:%M:%S %A"))
|
|
11
|
+
timezone: Optional[str] = "UTC"
|
|
12
|
+
model_config = ConfigDict(extra='allow')
|
|
13
|
+
class _user(BaseModel):
|
|
14
|
+
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
|
15
|
+
first_name: Optional[str] = Field(None, validation_alias=AliasChoices("firstName","first_name"))
|
|
16
|
+
last_name: Optional[str] = Field(None, validation_alias=AliasChoices("lastName","last_name"))
|
|
17
|
+
country: Optional[str] = ''
|
|
18
|
+
email: Optional[str] = ''
|
|
19
|
+
phone: Optional[str] = ''
|
|
20
|
+
role: Optional[list] = [] #i.e. ["admin","user","guest"]
|
|
21
|
+
department: Optional[list] = [] #i.e. ["R&D","IT","HR"]
|
|
22
|
+
permission: Optional[list] = [] #i.e. ["read","write","delete","execute"]
|
|
23
|
+
model_config = ConfigDict(extra='allow')
|
|
24
|
+
i18n: _i18n = Field(default_factory=_i18n)
|
|
25
|
+
user: Optional[_user] =Field(default_factory=_user)
|
|
26
|
+
model_config = ConfigDict(extra='allow')
|
|
@@ -1,123 +1,123 @@
|
|
|
1
|
-
import json, requests, re
|
|
2
|
-
from typing import Any
|
|
3
|
-
from abc import ABC, abstractmethod
|
|
4
|
-
from langchain_core.prompts import ChatPromptTemplate
|
|
5
|
-
from langchain_core.messages import AIMessage
|
|
6
|
-
from langchain_core.runnables import RunnableSerializable
|
|
7
|
-
from langchain_core.runnables import RunnableLambda
|
|
8
|
-
from bs4 import BeautifulSoup
|
|
9
|
-
from ws_bom_robot_app.llm.models.api import LlmRules
|
|
10
|
-
from ws_bom_robot_app.llm.providers.llm_manager import LlmInterface
|
|
11
|
-
from ws_bom_robot_app.llm.utils.
|
|
12
|
-
|
|
13
|
-
# SafeDict helper class
|
|
14
|
-
class SafeDict(dict):
|
|
15
|
-
def __missing__(self, key):
|
|
16
|
-
return ''
|
|
17
|
-
|
|
18
|
-
# Strategy Interface
|
|
19
|
-
class AgentDescriptorStrategy(ABC):
|
|
20
|
-
@abstractmethod
|
|
21
|
-
def enrich_prompt(self, prompt: str, input: dict) -> str:
|
|
22
|
-
pass
|
|
23
|
-
|
|
24
|
-
@abstractmethod
|
|
25
|
-
def rule_input(self, input: dict) -> str:
|
|
26
|
-
pass
|
|
27
|
-
|
|
28
|
-
# Concrete Strategy for Default Agent
|
|
29
|
-
class DefaultAgentDescriptor(AgentDescriptorStrategy):
|
|
30
|
-
def enrich_prompt(self, prompt: str, input: dict) -> str:
|
|
31
|
-
# Default enrichment logic (could be minimal or no-op)
|
|
32
|
-
return prompt.format_map(SafeDict(input))
|
|
33
|
-
|
|
34
|
-
def rule_input(self, input: dict) -> str:
|
|
35
|
-
return input.get('content', "")
|
|
36
|
-
|
|
37
|
-
# Concrete Strategy for URL2Text Agent
|
|
38
|
-
class URL2TextAgentDescriptor(AgentDescriptorStrategy):
|
|
39
|
-
def enrich_prompt(self, prompt: str, input: dict) -> str:
|
|
40
|
-
input["context"] = self._get_page_text(input)
|
|
41
|
-
return prompt.format_map(SafeDict(input))
|
|
42
|
-
|
|
43
|
-
def rule_input(self, input: dict) -> str:
|
|
44
|
-
return input.get('context', "")
|
|
45
|
-
|
|
46
|
-
def _get_page_text(self, input: dict) -> str:
|
|
47
|
-
url = input.get("content", "")
|
|
48
|
-
exclusions = input.get("exclude", {})
|
|
49
|
-
response = requests.get(url)
|
|
50
|
-
response.raise_for_status()
|
|
51
|
-
soup = BeautifulSoup(response.content, 'html5lib')
|
|
52
|
-
classes_to_exclude = exclusions.get("classes", [])
|
|
53
|
-
ids_to_exclude = exclusions.get("ids", [])
|
|
54
|
-
for class_name in classes_to_exclude:
|
|
55
|
-
for element in soup.find_all(class_=class_name):
|
|
56
|
-
element.extract()
|
|
57
|
-
for id_name in ids_to_exclude:
|
|
58
|
-
for element in soup.find_all(id=id_name):
|
|
59
|
-
element.extract()
|
|
60
|
-
for script in soup(["script", "noscript", "style", "head", "footer", "iframe"]):
|
|
61
|
-
script.extract()
|
|
62
|
-
return re.sub(' +', ' ', soup.get_text())
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
class AgentDescriptor:
|
|
66
|
-
# Dictionary to hold all agent strategies
|
|
67
|
-
_list: dict[str,AgentDescriptorStrategy] = {
|
|
68
|
-
"default": DefaultAgentDescriptor(),
|
|
69
|
-
"url2text": URL2TextAgentDescriptor(),
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
# Functions to manage strategies
|
|
73
|
-
@staticmethod
|
|
74
|
-
def add_strategy(name: str, strategy: AgentDescriptorStrategy):
|
|
75
|
-
"""_summary_
|
|
76
|
-
add a new strategy to the dictionary
|
|
77
|
-
Args:
|
|
78
|
-
name (str): name of the strategy, in lowercase
|
|
79
|
-
strategy (AgentDescriptorStrategy): class implementing the strategy
|
|
80
|
-
Examples:
|
|
81
|
-
AgentDescriptor.add_strategy("custom_agent_descriptor", CustomAgentDescriptor())
|
|
82
|
-
"""
|
|
83
|
-
AgentDescriptor._list[name.lower()] = strategy
|
|
84
|
-
|
|
85
|
-
@staticmethod
|
|
86
|
-
def get_strategy(name: str) -> AgentDescriptorStrategy:
|
|
87
|
-
return AgentDescriptor._list.get(name.lower(), DefaultAgentDescriptor())
|
|
88
|
-
|
|
89
|
-
def __init__(self, llm: LlmInterface, prompt: str, mode: str, rules: LlmRules = None):
|
|
90
|
-
self.__prompt = prompt
|
|
91
|
-
self.__llm = llm
|
|
92
|
-
self.rules= rules
|
|
93
|
-
self.strategy = self.get_strategy(mode) # Selects the strategy from the dictionary
|
|
94
|
-
|
|
95
|
-
async def __create_prompt(self, input_dict: dict):
|
|
96
|
-
input_data = json.loads(input_dict.get("input", {}))
|
|
97
|
-
system = self.strategy.enrich_prompt(self.__prompt, input_data)
|
|
98
|
-
if self.rules:
|
|
99
|
-
rule_input = self.strategy.rule_input(input_data)
|
|
100
|
-
rules_prompt = await get_rules(self.__llm.get_embeddings(), self.rules, rule_input)
|
|
101
|
-
system += rules_prompt
|
|
102
|
-
return ChatPromptTemplate.from_messages(
|
|
103
|
-
[
|
|
104
|
-
("system", system),
|
|
105
|
-
("user", input_data.get("content", ""))
|
|
106
|
-
]
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
def __create_agent_descriptor(self, content) -> RunnableSerializable[Any, Any]:
|
|
110
|
-
content = json.loads(content)
|
|
111
|
-
agent = (
|
|
112
|
-
{
|
|
113
|
-
"input": lambda x: x["input"],
|
|
114
|
-
}
|
|
115
|
-
| RunnableLambda(self.__create_prompt)
|
|
116
|
-
| self.__llm.get_llm()
|
|
117
|
-
)
|
|
118
|
-
return agent
|
|
119
|
-
|
|
120
|
-
async def run_agent(self, content) -> Any:
|
|
121
|
-
agent_descriptor = self.__create_agent_descriptor(content)
|
|
122
|
-
response: AIMessage = await agent_descriptor.ainvoke({"input": content})
|
|
123
|
-
return response
|
|
1
|
+
import json, requests, re
|
|
2
|
+
from typing import Any
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from langchain_core.prompts import ChatPromptTemplate
|
|
5
|
+
from langchain_core.messages import AIMessage
|
|
6
|
+
from langchain_core.runnables import RunnableSerializable
|
|
7
|
+
from langchain_core.runnables import RunnableLambda
|
|
8
|
+
from bs4 import BeautifulSoup
|
|
9
|
+
from ws_bom_robot_app.llm.models.api import LlmRules
|
|
10
|
+
from ws_bom_robot_app.llm.providers.llm_manager import LlmInterface
|
|
11
|
+
from ws_bom_robot_app.llm.utils.agent import get_rules
|
|
12
|
+
|
|
13
|
+
# SafeDict helper class
|
|
14
|
+
class SafeDict(dict):
|
|
15
|
+
def __missing__(self, key):
|
|
16
|
+
return ''
|
|
17
|
+
|
|
18
|
+
# Strategy Interface
|
|
19
|
+
class AgentDescriptorStrategy(ABC):
|
|
20
|
+
@abstractmethod
|
|
21
|
+
def enrich_prompt(self, prompt: str, input: dict) -> str:
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
@abstractmethod
|
|
25
|
+
def rule_input(self, input: dict) -> str:
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
# Concrete Strategy for Default Agent
|
|
29
|
+
class DefaultAgentDescriptor(AgentDescriptorStrategy):
|
|
30
|
+
def enrich_prompt(self, prompt: str, input: dict) -> str:
|
|
31
|
+
# Default enrichment logic (could be minimal or no-op)
|
|
32
|
+
return prompt.format_map(SafeDict(input))
|
|
33
|
+
|
|
34
|
+
def rule_input(self, input: dict) -> str:
|
|
35
|
+
return input.get('content', "")
|
|
36
|
+
|
|
37
|
+
# Concrete Strategy for URL2Text Agent
|
|
38
|
+
class URL2TextAgentDescriptor(AgentDescriptorStrategy):
|
|
39
|
+
def enrich_prompt(self, prompt: str, input: dict) -> str:
|
|
40
|
+
input["context"] = self._get_page_text(input)
|
|
41
|
+
return prompt.format_map(SafeDict(input))
|
|
42
|
+
|
|
43
|
+
def rule_input(self, input: dict) -> str:
|
|
44
|
+
return input.get('context', "")
|
|
45
|
+
|
|
46
|
+
def _get_page_text(self, input: dict) -> str:
|
|
47
|
+
url = input.get("content", "")
|
|
48
|
+
exclusions = input.get("exclude", {})
|
|
49
|
+
response = requests.get(url)
|
|
50
|
+
response.raise_for_status()
|
|
51
|
+
soup = BeautifulSoup(response.content, 'html5lib')
|
|
52
|
+
classes_to_exclude = exclusions.get("classes", [])
|
|
53
|
+
ids_to_exclude = exclusions.get("ids", [])
|
|
54
|
+
for class_name in classes_to_exclude:
|
|
55
|
+
for element in soup.find_all(class_=class_name):
|
|
56
|
+
element.extract()
|
|
57
|
+
for id_name in ids_to_exclude:
|
|
58
|
+
for element in soup.find_all(id=id_name):
|
|
59
|
+
element.extract()
|
|
60
|
+
for script in soup(["script", "noscript", "style", "head", "footer", "iframe"]):
|
|
61
|
+
script.extract()
|
|
62
|
+
return re.sub(' +', ' ', soup.get_text())
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class AgentDescriptor:
|
|
66
|
+
# Dictionary to hold all agent strategies
|
|
67
|
+
_list: dict[str,AgentDescriptorStrategy] = {
|
|
68
|
+
"default": DefaultAgentDescriptor(),
|
|
69
|
+
"url2text": URL2TextAgentDescriptor(),
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
# Functions to manage strategies
|
|
73
|
+
@staticmethod
|
|
74
|
+
def add_strategy(name: str, strategy: AgentDescriptorStrategy):
|
|
75
|
+
"""_summary_
|
|
76
|
+
add a new strategy to the dictionary
|
|
77
|
+
Args:
|
|
78
|
+
name (str): name of the strategy, in lowercase
|
|
79
|
+
strategy (AgentDescriptorStrategy): class implementing the strategy
|
|
80
|
+
Examples:
|
|
81
|
+
AgentDescriptor.add_strategy("custom_agent_descriptor", CustomAgentDescriptor())
|
|
82
|
+
"""
|
|
83
|
+
AgentDescriptor._list[name.lower()] = strategy
|
|
84
|
+
|
|
85
|
+
@staticmethod
|
|
86
|
+
def get_strategy(name: str) -> AgentDescriptorStrategy:
|
|
87
|
+
return AgentDescriptor._list.get(name.lower(), DefaultAgentDescriptor())
|
|
88
|
+
|
|
89
|
+
def __init__(self, llm: LlmInterface, prompt: str, mode: str, rules: LlmRules = None):
|
|
90
|
+
self.__prompt = prompt
|
|
91
|
+
self.__llm = llm
|
|
92
|
+
self.rules= rules
|
|
93
|
+
self.strategy = self.get_strategy(mode) # Selects the strategy from the dictionary
|
|
94
|
+
|
|
95
|
+
async def __create_prompt(self, input_dict: dict):
|
|
96
|
+
input_data = json.loads(input_dict.get("input", {}))
|
|
97
|
+
system = self.strategy.enrich_prompt(self.__prompt, input_data)
|
|
98
|
+
if self.rules:
|
|
99
|
+
rule_input = self.strategy.rule_input(input_data)
|
|
100
|
+
rules_prompt = await get_rules(self.__llm.get_embeddings(), self.rules, rule_input)
|
|
101
|
+
system += rules_prompt
|
|
102
|
+
return ChatPromptTemplate.from_messages(
|
|
103
|
+
[
|
|
104
|
+
("system", system),
|
|
105
|
+
("user", input_data.get("content", ""))
|
|
106
|
+
]
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
def __create_agent_descriptor(self, content) -> RunnableSerializable[Any, Any]:
|
|
110
|
+
content = json.loads(content)
|
|
111
|
+
agent = (
|
|
112
|
+
{
|
|
113
|
+
"input": lambda x: x["input"],
|
|
114
|
+
}
|
|
115
|
+
| RunnableLambda(self.__create_prompt)
|
|
116
|
+
| self.__llm.get_llm()
|
|
117
|
+
)
|
|
118
|
+
return agent
|
|
119
|
+
|
|
120
|
+
async def run_agent(self, content) -> Any:
|
|
121
|
+
agent_descriptor = self.__create_agent_descriptor(content)
|
|
122
|
+
response: AIMessage = await agent_descriptor.ainvoke({"input": content})
|
|
123
|
+
return response
|