ws-bom-robot-app 0.0.37__py3-none-any.whl → 0.0.103__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ws_bom_robot_app/config.py +35 -7
- ws_bom_robot_app/cron_manager.py +15 -14
- ws_bom_robot_app/llm/agent_context.py +26 -0
- ws_bom_robot_app/llm/agent_description.py +123 -123
- ws_bom_robot_app/llm/agent_handler.py +176 -180
- ws_bom_robot_app/llm/agent_lcel.py +107 -54
- ws_bom_robot_app/llm/api.py +100 -7
- ws_bom_robot_app/llm/defaut_prompt.py +15 -15
- ws_bom_robot_app/llm/evaluator.py +319 -0
- ws_bom_robot_app/llm/feedbacks/__init__.py +0 -0
- ws_bom_robot_app/llm/feedbacks/feedback_manager.py +66 -0
- ws_bom_robot_app/llm/main.py +159 -110
- ws_bom_robot_app/llm/models/api.py +70 -5
- ws_bom_robot_app/llm/models/feedback.py +30 -0
- ws_bom_robot_app/llm/nebuly_handler.py +185 -0
- ws_bom_robot_app/llm/providers/llm_manager.py +244 -80
- ws_bom_robot_app/llm/tools/models/main.py +8 -0
- ws_bom_robot_app/llm/tools/tool_builder.py +68 -23
- ws_bom_robot_app/llm/tools/tool_manager.py +343 -133
- ws_bom_robot_app/llm/tools/utils.py +41 -25
- ws_bom_robot_app/llm/utils/agent.py +34 -0
- ws_bom_robot_app/llm/utils/chunker.py +6 -1
- ws_bom_robot_app/llm/utils/cleanup.py +81 -0
- ws_bom_robot_app/llm/utils/cms.py +123 -0
- ws_bom_robot_app/llm/utils/download.py +183 -79
- ws_bom_robot_app/llm/utils/print.py +29 -29
- ws_bom_robot_app/llm/vector_store/db/__init__.py +0 -0
- ws_bom_robot_app/llm/vector_store/db/base.py +193 -0
- ws_bom_robot_app/llm/vector_store/db/chroma.py +97 -0
- ws_bom_robot_app/llm/vector_store/db/faiss.py +91 -0
- ws_bom_robot_app/llm/vector_store/db/manager.py +15 -0
- ws_bom_robot_app/llm/vector_store/db/qdrant.py +73 -0
- ws_bom_robot_app/llm/vector_store/generator.py +137 -137
- ws_bom_robot_app/llm/vector_store/integration/api.py +216 -0
- ws_bom_robot_app/llm/vector_store/integration/azure.py +1 -1
- ws_bom_robot_app/llm/vector_store/integration/base.py +58 -15
- ws_bom_robot_app/llm/vector_store/integration/confluence.py +41 -11
- ws_bom_robot_app/llm/vector_store/integration/dropbox.py +1 -1
- ws_bom_robot_app/llm/vector_store/integration/gcs.py +1 -1
- ws_bom_robot_app/llm/vector_store/integration/github.py +22 -22
- ws_bom_robot_app/llm/vector_store/integration/googledrive.py +46 -17
- ws_bom_robot_app/llm/vector_store/integration/jira.py +112 -75
- ws_bom_robot_app/llm/vector_store/integration/manager.py +6 -2
- ws_bom_robot_app/llm/vector_store/integration/s3.py +1 -1
- ws_bom_robot_app/llm/vector_store/integration/sftp.py +1 -1
- ws_bom_robot_app/llm/vector_store/integration/sharepoint.py +7 -14
- ws_bom_robot_app/llm/vector_store/integration/shopify.py +143 -0
- ws_bom_robot_app/llm/vector_store/integration/sitemap.py +9 -1
- ws_bom_robot_app/llm/vector_store/integration/slack.py +3 -2
- ws_bom_robot_app/llm/vector_store/integration/thron.py +236 -0
- ws_bom_robot_app/llm/vector_store/loader/base.py +52 -8
- ws_bom_robot_app/llm/vector_store/loader/docling.py +71 -33
- ws_bom_robot_app/llm/vector_store/loader/json_loader.py +25 -25
- ws_bom_robot_app/main.py +148 -146
- ws_bom_robot_app/subprocess_runner.py +106 -0
- ws_bom_robot_app/task_manager.py +207 -54
- ws_bom_robot_app/util.py +65 -20
- ws_bom_robot_app-0.0.103.dist-info/METADATA +364 -0
- ws_bom_robot_app-0.0.103.dist-info/RECORD +76 -0
- {ws_bom_robot_app-0.0.37.dist-info → ws_bom_robot_app-0.0.103.dist-info}/WHEEL +1 -1
- ws_bom_robot_app/llm/settings.py +0 -4
- ws_bom_robot_app/llm/utils/agent_utils.py +0 -17
- ws_bom_robot_app/llm/utils/kb.py +0 -34
- ws_bom_robot_app-0.0.37.dist-info/METADATA +0 -277
- ws_bom_robot_app-0.0.37.dist-info/RECORD +0 -60
- {ws_bom_robot_app-0.0.37.dist-info → ws_bom_robot_app-0.0.103.dist-info}/top_level.txt +0 -0
ws_bom_robot_app/llm/main.py
CHANGED
|
@@ -1,110 +1,159 @@
|
|
|
1
|
-
from asyncio import Queue
|
|
2
|
-
import asyncio, json, logging, os, traceback
|
|
3
|
-
from fastapi import Request
|
|
4
|
-
from langchain.callbacks.tracers import LangChainTracer
|
|
5
|
-
from langchain_core.callbacks.base import AsyncCallbackHandler
|
|
6
|
-
from langchain_core.messages import AIMessage, HumanMessage
|
|
7
|
-
from langsmith import Client as LangSmithClient
|
|
8
|
-
from
|
|
9
|
-
from
|
|
10
|
-
from ws_bom_robot_app.
|
|
11
|
-
from ws_bom_robot_app.llm.
|
|
12
|
-
from ws_bom_robot_app.llm.
|
|
13
|
-
from ws_bom_robot_app.llm.
|
|
14
|
-
from ws_bom_robot_app.llm.
|
|
15
|
-
from ws_bom_robot_app.llm.
|
|
16
|
-
from ws_bom_robot_app.llm.
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
)
|
|
28
|
-
result:
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
1
|
+
from asyncio import Queue
|
|
2
|
+
import asyncio, json, logging, os, traceback, re
|
|
3
|
+
from fastapi import Request
|
|
4
|
+
from langchain.callbacks.tracers import LangChainTracer
|
|
5
|
+
from langchain_core.callbacks.base import AsyncCallbackHandler
|
|
6
|
+
from langchain_core.messages import BaseMessage, AIMessage, HumanMessage
|
|
7
|
+
from langsmith import Client as LangSmithClient
|
|
8
|
+
from typing import AsyncGenerator, List
|
|
9
|
+
from ws_bom_robot_app.config import config
|
|
10
|
+
from ws_bom_robot_app.llm.agent_description import AgentDescriptor
|
|
11
|
+
from ws_bom_robot_app.llm.agent_handler import AgentHandler, RawAgentHandler
|
|
12
|
+
from ws_bom_robot_app.llm.agent_lcel import AgentLcel
|
|
13
|
+
from ws_bom_robot_app.llm.models.api import InvokeRequest, StreamRequest
|
|
14
|
+
from ws_bom_robot_app.llm.providers.llm_manager import LlmInterface
|
|
15
|
+
from ws_bom_robot_app.llm.tools.tool_builder import get_structured_tools
|
|
16
|
+
from ws_bom_robot_app.llm.nebuly_handler import NebulyHandler
|
|
17
|
+
|
|
18
|
+
async def invoke(rq: InvokeRequest) -> str:
|
|
19
|
+
await rq.initialize()
|
|
20
|
+
_msg: str = rq.messages[-1].content
|
|
21
|
+
processor = AgentDescriptor(
|
|
22
|
+
llm=rq.get_llm(),
|
|
23
|
+
prompt=rq.system_message,
|
|
24
|
+
mode = rq.mode,
|
|
25
|
+
rules=rq.rules if rq.rules else None
|
|
26
|
+
)
|
|
27
|
+
result: AIMessage = await processor.run_agent(_msg)
|
|
28
|
+
return {"result": result.content}
|
|
29
|
+
|
|
30
|
+
def _parse_formatted_message(message: str) -> str:
|
|
31
|
+
try:
|
|
32
|
+
text_fragments = []
|
|
33
|
+
quoted_strings = re.findall(r'"([^"\\]*(?:\\.[^"\\]*)*)"', message)
|
|
34
|
+
for string in quoted_strings:
|
|
35
|
+
if not string.startswith(('threadId', 'type')) and len(string) > 1:
|
|
36
|
+
text_fragments.append(string)
|
|
37
|
+
result = ''.join(text_fragments)
|
|
38
|
+
result = result.replace('\\n', '\n')
|
|
39
|
+
except:
|
|
40
|
+
result = message
|
|
41
|
+
return result
|
|
42
|
+
|
|
43
|
+
async def __stream(rq: StreamRequest, ctx: Request, queue: Queue, formatted: bool = True) -> None:
|
|
44
|
+
#os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
|
|
45
|
+
|
|
46
|
+
# rq initialization
|
|
47
|
+
await rq.initialize()
|
|
48
|
+
for tool in rq.app_tools:
|
|
49
|
+
tool.thread_id = rq.thread_id
|
|
50
|
+
|
|
51
|
+
#llm
|
|
52
|
+
__llm: LlmInterface = rq.get_llm()
|
|
53
|
+
|
|
54
|
+
#chat history
|
|
55
|
+
chat_history: list[BaseMessage] = []
|
|
56
|
+
for message in rq.messages:
|
|
57
|
+
if message.role in ["human","user"]:
|
|
58
|
+
_content = message.content
|
|
59
|
+
# multimodal content parsing
|
|
60
|
+
if isinstance(_content, list):
|
|
61
|
+
try:
|
|
62
|
+
_content = await __llm.format_multimodal_content(_content)
|
|
63
|
+
except Exception as e:
|
|
64
|
+
logging.warning(f"Error parsing multimodal content {_content[:100]}: {e}")
|
|
65
|
+
chat_history.append(HumanMessage(content=_content))
|
|
66
|
+
elif message.role in ["ai","assistant"]:
|
|
67
|
+
message_content = ""
|
|
68
|
+
if formatted:
|
|
69
|
+
if '{\"type\":\"string\"' in message.content:
|
|
70
|
+
try:
|
|
71
|
+
json_msg = json.loads('[' + message.content[:-1] + ']')
|
|
72
|
+
for msg in json_msg:
|
|
73
|
+
if msg.get("content"):
|
|
74
|
+
message_content += msg["content"]
|
|
75
|
+
except:
|
|
76
|
+
message_content = _parse_formatted_message(message.content)
|
|
77
|
+
elif '{\"type\":\"text\"' in message.content:
|
|
78
|
+
try:
|
|
79
|
+
json_msg = json.loads('[' + message.content[:-1] + ']')
|
|
80
|
+
for msg in json_msg:
|
|
81
|
+
if msg.get("text"):
|
|
82
|
+
message_content += msg["text"]
|
|
83
|
+
except:
|
|
84
|
+
message_content = _parse_formatted_message(message.content)
|
|
85
|
+
else:
|
|
86
|
+
message_content = _parse_formatted_message(message.content)
|
|
87
|
+
else:
|
|
88
|
+
message_content = message.content
|
|
89
|
+
if message_content:
|
|
90
|
+
chat_history.append(AIMessage(content=message_content))
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
#agent handler
|
|
94
|
+
if formatted:
|
|
95
|
+
agent_handler = AgentHandler(queue, rq.provider, rq.thread_id)
|
|
96
|
+
else:
|
|
97
|
+
agent_handler = RawAgentHandler(queue, rq.provider)
|
|
98
|
+
#TODO: move from os.environ to rq
|
|
99
|
+
os.environ["AGENT_HANDLER_FORMATTED"] = str(formatted)
|
|
100
|
+
|
|
101
|
+
#callbacks
|
|
102
|
+
## agent
|
|
103
|
+
callbacks: List[AsyncCallbackHandler] = [agent_handler]
|
|
104
|
+
## langchain tracing
|
|
105
|
+
if rq.lang_chain_tracing:
|
|
106
|
+
client = LangSmithClient(
|
|
107
|
+
api_key= rq.secrets.get("langChainApiKey", "")
|
|
108
|
+
)
|
|
109
|
+
trace = LangChainTracer(project_name=rq.lang_chain_project,client=client,tags=[str(ctx.base_url) if ctx else ''])
|
|
110
|
+
callbacks.append(trace)
|
|
111
|
+
## nebuly tracing
|
|
112
|
+
if rq.secrets.get("nebulyApiKey","") != "":
|
|
113
|
+
user_id = rq.system_context.user.id if rq.system_context and rq.system_context.user and rq.system_context.user.id else None
|
|
114
|
+
nebuly_callback = NebulyHandler(
|
|
115
|
+
llm_model=__llm.config.model,
|
|
116
|
+
threadId=rq.thread_id,
|
|
117
|
+
chat_history=chat_history,
|
|
118
|
+
url=config.NEBULY_API_URL,
|
|
119
|
+
api_key=rq.secrets.get("nebulyApiKey", None),
|
|
120
|
+
user_id=user_id
|
|
121
|
+
)
|
|
122
|
+
callbacks.append(nebuly_callback)
|
|
123
|
+
|
|
124
|
+
# chain
|
|
125
|
+
processor = AgentLcel(
|
|
126
|
+
llm=__llm,
|
|
127
|
+
sys_message=rq.system_message,
|
|
128
|
+
sys_context=rq.system_context,
|
|
129
|
+
tools=get_structured_tools(__llm, tools=rq.app_tools, callbacks=[callbacks], queue=queue),
|
|
130
|
+
rules=rq.rules,
|
|
131
|
+
json_schema=rq.output_structure.get("outputFormat") if rq.output_structure and rq.output_structure.get("outputType") == "json" else None
|
|
132
|
+
)
|
|
133
|
+
try:
|
|
134
|
+
await processor.executor.ainvoke(
|
|
135
|
+
{"chat_history": chat_history},
|
|
136
|
+
{"callbacks": callbacks},
|
|
137
|
+
)
|
|
138
|
+
except Exception as e:
|
|
139
|
+
_error = f"Agent invoke ex: {e}"
|
|
140
|
+
logging.warning(_error)
|
|
141
|
+
if config.runtime_options().debug:
|
|
142
|
+
_error += f" | {traceback.format_exc()}"
|
|
143
|
+
await queue.put(_error)
|
|
144
|
+
await queue.put(None)
|
|
145
|
+
|
|
146
|
+
# signal the end of streaming
|
|
147
|
+
await queue.put(None)
|
|
148
|
+
|
|
149
|
+
async def stream(rq: StreamRequest, ctx: Request, formatted: bool = True) -> AsyncGenerator[str, None]:
|
|
150
|
+
queue = Queue()
|
|
151
|
+
task = asyncio.create_task(__stream(rq, ctx, queue, formatted))
|
|
152
|
+
try:
|
|
153
|
+
while True:
|
|
154
|
+
token = await queue.get()
|
|
155
|
+
if token is None: # None indicates the end of streaming
|
|
156
|
+
break
|
|
157
|
+
yield token
|
|
158
|
+
finally:
|
|
159
|
+
await task
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
from typing import List, Dict, Optional, Tuple, Union
|
|
1
|
+
from typing import List, Dict, Optional, Tuple, Union, Any
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from pydantic import AliasChoices, BaseModel, Field, ConfigDict
|
|
4
4
|
from langchain_core.embeddings import Embeddings
|
|
5
5
|
from langchain.chains.query_constructor.schema import AttributeInfo
|
|
6
|
+
from ws_bom_robot_app.llm.agent_context import AgentContext
|
|
6
7
|
from ws_bom_robot_app.llm.models.kb import LlmKbEndpoint, LlmKbIntegration
|
|
7
8
|
from ws_bom_robot_app.llm.providers.llm_manager import LlmManager, LlmConfig, LlmInterface
|
|
8
9
|
from ws_bom_robot_app.llm.utils.download import download_file
|
|
@@ -10,6 +11,39 @@ import os, shutil, uuid
|
|
|
10
11
|
from ws_bom_robot_app.config import Settings, config
|
|
11
12
|
|
|
12
13
|
class LlmMessage(BaseModel):
|
|
14
|
+
"""
|
|
15
|
+
💬 multimodal chat
|
|
16
|
+
|
|
17
|
+
The multimodal message allows users to interact with the application using both text and media files.
|
|
18
|
+
`robot` accept multimodal input in a uniform way, regarding the llm provider used.
|
|
19
|
+
|
|
20
|
+
- simple message
|
|
21
|
+
|
|
22
|
+
```json
|
|
23
|
+
{
|
|
24
|
+
"role": "user",
|
|
25
|
+
"content": "What is the capital of France?"
|
|
26
|
+
}
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
- multimodal message
|
|
30
|
+
|
|
31
|
+
```jsonc
|
|
32
|
+
{
|
|
33
|
+
"role": "user",
|
|
34
|
+
"content": [
|
|
35
|
+
{ "type": "text", "text": "Read carefully all the attachments, analize the content and provide a summary for each one:" },
|
|
36
|
+
{ "type": "image", "url": "https://www.example.com/image/foo.jpg" },
|
|
37
|
+
{ "type": "file", "url": "https://www.example.com/pdf/bar.pdf" },
|
|
38
|
+
{ "type": "file", "url": "data:plain/text;base64,CiAgICAgIF9fX19fCiAgICAgLyAgIC..." }, // base64 encoded file
|
|
39
|
+
{ "type": "media", "mime_type": "plain/text", "data": "CiAgICAgIF9fX19fCiAgICAgLyAgIC..." } // google/gemini specific input format
|
|
40
|
+
]
|
|
41
|
+
}
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
> 💡 `url` can be a remote url or a base64 representation of the file: [rfc 2397](https://datatracker.ietf.org/doc/html/rfc2397).
|
|
45
|
+
Can also be used the llm/model specific input format.
|
|
46
|
+
"""
|
|
13
47
|
role: str
|
|
14
48
|
content: Union[str, list]
|
|
15
49
|
|
|
@@ -28,12 +62,14 @@ class LlmAppToolChainSettings(BaseModel):
|
|
|
28
62
|
provider: Optional[str] = "openai"
|
|
29
63
|
model: Optional[str] = None
|
|
30
64
|
temperature: Optional[float] = 0
|
|
65
|
+
outputStructure: Optional[dict] = None
|
|
31
66
|
|
|
32
67
|
class LlmAppToolDbSettings(BaseModel):
|
|
33
68
|
connection_string: Optional[str] = Field(None, validation_alias=AliasChoices("connectionString","connection_string"))
|
|
34
69
|
|
|
35
70
|
class LlmAppTool(BaseModel):
|
|
36
71
|
id: Optional[str] = None
|
|
72
|
+
thread_id: Optional[str] = Field(None, validation_alias=AliasChoices("threadId","thread_id"))
|
|
37
73
|
name: str
|
|
38
74
|
description: Optional[str] = None
|
|
39
75
|
type: str
|
|
@@ -51,6 +87,11 @@ class LlmAppTool(BaseModel):
|
|
|
51
87
|
vector_type: Optional[str] = Field('faiss', validation_alias=AliasChoices("vectorDbType","vector_type"))
|
|
52
88
|
vector_db: Optional[str] = Field(None, validation_alias=AliasChoices("vectorDbFile","vector_db"))
|
|
53
89
|
is_active: Optional[bool] = Field(True, validation_alias=AliasChoices("isActive","is_active"))
|
|
90
|
+
def secrets_to_dict(self) -> Dict[str, str]:
|
|
91
|
+
_secrets = {}
|
|
92
|
+
for d in self.secrets or []:
|
|
93
|
+
_secrets[d.get("secretId")] = d.get("secretValue")
|
|
94
|
+
return _secrets
|
|
54
95
|
def get_vector_filtering(self) -> Optional[Tuple[str, List[AttributeInfo]]]:
|
|
55
96
|
_description = None
|
|
56
97
|
_metadata = None
|
|
@@ -74,11 +115,33 @@ class LlmAppTool(BaseModel):
|
|
|
74
115
|
extra='allow'
|
|
75
116
|
)
|
|
76
117
|
|
|
118
|
+
class NebulyInteraction(BaseModel):
|
|
119
|
+
conversation_id: str = Field(..., description="Unique identifier for grouping related interactions")
|
|
120
|
+
input: str = Field(..., description="User input text in the interaction")
|
|
121
|
+
output: str = Field(..., description="LLM response shown to the user")
|
|
122
|
+
time_start: str = Field(..., description="ISO 8601 formatted start time of the LLM call")
|
|
123
|
+
time_end: str = Field(..., description="ISO 8601 formatted end time of the LLM call")
|
|
124
|
+
end_user: str = Field(..., description="Unique identifier for the end user (recommended: hashed username/email or thread_id)")
|
|
125
|
+
tags: Optional[Dict[str, str]] = Field(default=None, description="Custom key-value pairs for tagging interactions")
|
|
126
|
+
|
|
127
|
+
class NebulyLLMTrace(BaseModel):
|
|
128
|
+
model: str = Field(..., description="The name of the LLM model used for the interaction")
|
|
129
|
+
messages: List[LlmMessage] = Field(..., description="List of messages exchanged during the interaction")
|
|
130
|
+
output: str = Field(..., description="The final output generated by the LLM")
|
|
131
|
+
input_tokens: Optional[int] = Field(..., description="Number of tokens in the input messages")
|
|
132
|
+
output_tokens: Optional[int] = Field(..., description="Number of tokens in the output message")
|
|
133
|
+
|
|
134
|
+
class NebulyRetrievalTrace(BaseModel):
|
|
135
|
+
source: Union[str, None] = Field(..., description="The source of the retrieved documents")
|
|
136
|
+
input: str = Field(..., description="The input query used for retrieval")
|
|
137
|
+
outputs: List[str] = Field(..., description="List of retrieved document contents")
|
|
138
|
+
|
|
77
139
|
#region llm public endpoints
|
|
78
140
|
|
|
79
141
|
#region api
|
|
80
142
|
class LlmApp(BaseModel):
|
|
81
143
|
system_message: str = Field(..., validation_alias=AliasChoices("systemMessage","system_message"))
|
|
144
|
+
system_context: Optional[AgentContext] = Field(AgentContext(), validation_alias=AliasChoices("systemContext","system_context"))
|
|
82
145
|
messages: List[LlmMessage]
|
|
83
146
|
provider: Optional[str] = "openai"
|
|
84
147
|
model: Optional[str] = None
|
|
@@ -91,6 +154,7 @@ class LlmApp(BaseModel):
|
|
|
91
154
|
fine_tuned_model: Optional[str] = Field(None, validation_alias=AliasChoices("fineTunedModel","fine_tuned_model"))
|
|
92
155
|
lang_chain_tracing: Optional[bool] = Field(False, validation_alias=AliasChoices("langChainTracing","lang_chain_tracing"))
|
|
93
156
|
lang_chain_project: Optional[str] = Field(None, validation_alias=AliasChoices("langChainProject","lang_chain_project"))
|
|
157
|
+
output_structure: Optional[Dict[str, Any]] = Field(None, validation_alias=AliasChoices("outputStructure","output_structure"))
|
|
94
158
|
model_config = ConfigDict(
|
|
95
159
|
extra='allow'
|
|
96
160
|
)
|
|
@@ -100,7 +164,7 @@ class LlmApp(BaseModel):
|
|
|
100
164
|
return list(set(
|
|
101
165
|
os.path.basename(db) for db in [self.vector_db] +
|
|
102
166
|
([self.rules.vector_db] if self.rules and self.rules.vector_db else []) +
|
|
103
|
-
[db for tool in (self.app_tools or []) for db in [tool.vector_db]]
|
|
167
|
+
[db for tool in (self.app_tools or []) for db in [tool.vector_db] if tool.is_active]
|
|
104
168
|
if db is not None
|
|
105
169
|
))
|
|
106
170
|
def __decompress_zip(self,zip_file_path, extract_to):
|
|
@@ -124,7 +188,7 @@ class LlmApp(BaseModel):
|
|
|
124
188
|
for tool in self.app_tools or []:
|
|
125
189
|
tool.vector_db = os.path.join(_vector_db_folder, os.path.splitext(os.path.basename(tool.vector_db))[0]) if tool.vector_db else None
|
|
126
190
|
def api_key(self):
|
|
127
|
-
return self.secrets.get("
|
|
191
|
+
return self.secrets.get("apiKey", "")
|
|
128
192
|
def get_llm(self) -> LlmInterface:
|
|
129
193
|
return LlmManager._list[self.provider](LlmConfig(
|
|
130
194
|
api_key=self.api_key(),
|
|
@@ -139,7 +203,8 @@ class InvokeRequest(LlmApp):
|
|
|
139
203
|
mode: str
|
|
140
204
|
|
|
141
205
|
class StreamRequest(LlmApp):
|
|
142
|
-
thread_id: Optional[str] = Field(
|
|
206
|
+
thread_id: Optional[str] = Field(default=str(uuid.uuid4()), validation_alias=AliasChoices("threadId","thread_id"))
|
|
207
|
+
msg_id: Optional[str] = Field(default=str(uuid.uuid4()), validation_alias=AliasChoices("msgId","msg_id"))
|
|
143
208
|
#endregion
|
|
144
209
|
|
|
145
210
|
#region vector_db
|
|
@@ -159,7 +224,7 @@ class VectorDbRequest(BaseModel):
|
|
|
159
224
|
def config(self) -> Settings:
|
|
160
225
|
return config
|
|
161
226
|
def api_key(self):
|
|
162
|
-
return self.secrets.get("
|
|
227
|
+
return self.secrets.get("apiKey", "")
|
|
163
228
|
def out_name(self):
|
|
164
229
|
if self.vector_db:
|
|
165
230
|
return ".".join(self.vector_db.split(".")[:-1]) if self.vector_db.endswith(".zip") else self.vector_db
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from pydantic import BaseModel, Field
|
|
2
|
+
|
|
3
|
+
class NebulyFeedbackAction(BaseModel):
|
|
4
|
+
"""
|
|
5
|
+
FeedbackAction is a model that represents the action taken by the user
|
|
6
|
+
in response to the feedback provided by the LLM.
|
|
7
|
+
"""
|
|
8
|
+
slug: str = Field("rating", description="A string identifier for the feedback action",
|
|
9
|
+
enum=["thumbs_up", "thumbs_down", "copy_input", "copy_output", "paste", "rating"])
|
|
10
|
+
text: str = Field(..., description="The text content of the feedback")
|
|
11
|
+
value: int = Field(..., description="A numeric value associated with the feedback")
|
|
12
|
+
|
|
13
|
+
class NebulyFeedbackMetadata(BaseModel):
|
|
14
|
+
"""
|
|
15
|
+
FeedbackMetadata is a model that represents the metadata associated with user feedback.
|
|
16
|
+
This includes information about the interaction and the user who provided feedback.
|
|
17
|
+
"""
|
|
18
|
+
input: str = Field(None, description="The input of the interactions to which the action refers to")
|
|
19
|
+
output: str = Field(None, description="The output of the interactions to which the action refers to")
|
|
20
|
+
end_user: str = Field(..., description="The identifier used for the end-user")
|
|
21
|
+
timestamp: str = Field(..., description="The timestamp of the action event")
|
|
22
|
+
anonymize: bool = Field(False, description="Boolean flag. If set to true, PII will be removed from the text field")
|
|
23
|
+
|
|
24
|
+
class NebulyFeedbackPayload(BaseModel):
|
|
25
|
+
"""
|
|
26
|
+
NebulyFeedback is a model that combines feedback action and metadata.
|
|
27
|
+
It represents a complete feedback entry from a user interaction with the LLM.
|
|
28
|
+
"""
|
|
29
|
+
action: NebulyFeedbackAction = Field(..., description="The action taken by the user as feedback")
|
|
30
|
+
metadata: NebulyFeedbackMetadata = Field(..., description="Metadata associated with the feedback")
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
from ws_bom_robot_app.llm.models.api import NebulyInteraction, NebulyLLMTrace, NebulyRetrievalTrace
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from langchain_core.callbacks.base import AsyncCallbackHandler
|
|
5
|
+
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
|
|
6
|
+
from langchain_core.outputs import ChatGenerationChunk, GenerationChunk
|
|
7
|
+
|
|
8
|
+
class NebulyHandler(AsyncCallbackHandler):
|
|
9
|
+
def __init__(self, llm_model: str | None, threadId: str = None, chat_history: list[BaseMessage] = [], url: str = None, api_key: str = None, user_id: str | None = None):
|
|
10
|
+
super().__init__()
|
|
11
|
+
self.__started: bool = False
|
|
12
|
+
self.__url: str = url
|
|
13
|
+
self.__api_key: str = api_key
|
|
14
|
+
self.chat_history = chat_history
|
|
15
|
+
self.interaction = NebulyInteraction(
|
|
16
|
+
conversation_id=threadId,
|
|
17
|
+
input="",
|
|
18
|
+
output="",
|
|
19
|
+
time_start="",
|
|
20
|
+
time_end="",
|
|
21
|
+
end_user= user_id if user_id and user_id != "" else threadId,
|
|
22
|
+
tags={"model": llm_model},
|
|
23
|
+
)
|
|
24
|
+
self.llm_trace = NebulyLLMTrace(
|
|
25
|
+
model=llm_model,
|
|
26
|
+
messages=[],
|
|
27
|
+
output="",
|
|
28
|
+
input_tokens=0,
|
|
29
|
+
output_tokens=0,
|
|
30
|
+
)
|
|
31
|
+
self.__response_with_rag: str = "false" # Flag to check if the AI used some retrieval tools
|
|
32
|
+
self.__retrieval_query: str = ""
|
|
33
|
+
self.retrieval_traces: list[NebulyRetrievalTrace] = []
|
|
34
|
+
|
|
35
|
+
async def on_chat_model_start(self, serialized, messages, *, run_id, parent_run_id = None, tags = None, metadata = None, **kwargs):
|
|
36
|
+
# Initialize the interaction with the input message
|
|
37
|
+
if not self.__started:
|
|
38
|
+
message_list = self.__flat_messages(messages)
|
|
39
|
+
if isinstance(message_list[-1], HumanMessage):
|
|
40
|
+
if isinstance(message_list[-1].content, list):
|
|
41
|
+
self.interaction.input = self.__parse_multimodal_input(message_list[-1].content)
|
|
42
|
+
else:
|
|
43
|
+
self.interaction.input = message_list[-1].content
|
|
44
|
+
self.interaction.tags["generated"] = self.__is_message_generated(message_list)
|
|
45
|
+
else:
|
|
46
|
+
raise ValueError("Last message is not a HumanMessage")
|
|
47
|
+
self.interaction.time_start = datetime.now().astimezone().isoformat()
|
|
48
|
+
self.__started = True
|
|
49
|
+
|
|
50
|
+
async def on_llm_end(self, response, *, run_id, parent_run_id = None, tags = None, **kwargs):
|
|
51
|
+
generation: Union[ChatGenerationChunk, GenerationChunk] = response.generations[0]
|
|
52
|
+
usage_metadata: dict = generation[0].message.usage_metadata
|
|
53
|
+
self.llm_trace.input_tokens = usage_metadata.get("input_tokens", 0)
|
|
54
|
+
self.llm_trace.output_tokens = usage_metadata.get("output_tokens", 0)
|
|
55
|
+
|
|
56
|
+
async def on_retriever_start(self, serialized, query, *, run_id, parent_run_id = None, tags = None, metadata = None, **kwargs):
|
|
57
|
+
self.__retrieval_query = query
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
async def on_retriever_end(self, documents, *, run_id, parent_run_id = None, tags = None, **kwargs):
|
|
61
|
+
# pass the document source because of the large amount of data in the document content
|
|
62
|
+
for doc in documents:
|
|
63
|
+
self.retrieval_traces.append(
|
|
64
|
+
NebulyRetrievalTrace(
|
|
65
|
+
source=doc.metadata.get("source", "content unavailable"),
|
|
66
|
+
input=self.__retrieval_query,
|
|
67
|
+
outputs=[doc.metadata.get("source", "content unavailable")]
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
async def on_tool_start(self, serialized, input_str, *, run_id, parent_run_id = None, tags = None, metadata = None, inputs = None, **kwargs):
|
|
72
|
+
self.__response_with_rag = "true" # Set the flag to true when the retriever starts
|
|
73
|
+
|
|
74
|
+
async def on_agent_finish(self, finish, *, run_id, parent_run_id = None, tags = None, **kwargs):
|
|
75
|
+
# Interaction
|
|
76
|
+
self.interaction.output = finish.return_values["output"]
|
|
77
|
+
# Trace
|
|
78
|
+
self.llm_trace.output = finish.return_values["output"]
|
|
79
|
+
message_history = self._convert_to_json_format(self.chat_history)
|
|
80
|
+
self.llm_trace.messages = self.__parse_multimodal_history(message_history)
|
|
81
|
+
await self.__send_interaction()
|
|
82
|
+
|
|
83
|
+
def __flat_messages(self, messages: list[list[BaseMessage]], to_json: bool = False) -> list[BaseMessage]:
|
|
84
|
+
"""
|
|
85
|
+
Maps the messages to the format expected by the LLM.
|
|
86
|
+
Flattens the nested list structure of messages.
|
|
87
|
+
"""
|
|
88
|
+
# Flatten the nested list structure
|
|
89
|
+
flattened_messages = []
|
|
90
|
+
for message_list in messages:
|
|
91
|
+
flattened_messages.extend(message_list)
|
|
92
|
+
# Store JSON format in LLM trace
|
|
93
|
+
if to_json:
|
|
94
|
+
return self._convert_to_json_format(flattened_messages)
|
|
95
|
+
return flattened_messages
|
|
96
|
+
|
|
97
|
+
def _convert_to_json_format(self, messages: list[BaseMessage]) -> list[dict]:
|
|
98
|
+
"""Converts BaseMessage objects to JSON format with role and content."""
|
|
99
|
+
result = []
|
|
100
|
+
for message in messages:
|
|
101
|
+
if isinstance(message, HumanMessage):
|
|
102
|
+
role = "user"
|
|
103
|
+
elif isinstance(message, AIMessage):
|
|
104
|
+
role = "assistant"
|
|
105
|
+
else:
|
|
106
|
+
role = "system"
|
|
107
|
+
|
|
108
|
+
result.append({
|
|
109
|
+
"role": role,
|
|
110
|
+
"content": message.content
|
|
111
|
+
})
|
|
112
|
+
return result
|
|
113
|
+
|
|
114
|
+
async def __send_interaction(self):
|
|
115
|
+
# Send the interaction to the server
|
|
116
|
+
from urllib.parse import urljoin
|
|
117
|
+
import requests
|
|
118
|
+
|
|
119
|
+
payload = self.__prepare_payload()
|
|
120
|
+
endpoint = urljoin(self.__url, "event-ingestion/api/v2/events/trace_interaction")
|
|
121
|
+
# Prepare headers with authentication
|
|
122
|
+
headers = {"Content-Type": "application/json"}
|
|
123
|
+
if self.__api_key:
|
|
124
|
+
headers["Authorization"] = f"Bearer {self.__api_key}"
|
|
125
|
+
response = requests.post(
|
|
126
|
+
url=endpoint,
|
|
127
|
+
json=payload,
|
|
128
|
+
headers=headers
|
|
129
|
+
)
|
|
130
|
+
if response.status_code != 200:
|
|
131
|
+
print(f"Failed to send interaction: {response.status_code} {response.text}")
|
|
132
|
+
|
|
133
|
+
def __prepare_payload(self):
|
|
134
|
+
self.interaction.time_end = datetime.now().astimezone().isoformat()
|
|
135
|
+
self.interaction.tags["response_with_rag"] = self.__response_with_rag
|
|
136
|
+
payload = {
|
|
137
|
+
"interaction": self.interaction.__dict__,
|
|
138
|
+
"traces": [
|
|
139
|
+
self.llm_trace.__dict__,
|
|
140
|
+
]
|
|
141
|
+
}
|
|
142
|
+
for trace in self.retrieval_traces:
|
|
143
|
+
if trace.source:
|
|
144
|
+
payload["traces"].append(trace.__dict__)
|
|
145
|
+
return payload
|
|
146
|
+
|
|
147
|
+
def __parse_multimodal_input(self, input: list[dict]) -> str:
|
|
148
|
+
"""Parse multimodal input and return a string representation."""
|
|
149
|
+
type_mapping = {
|
|
150
|
+
"text": lambda item: item.get("text", ""),
|
|
151
|
+
"image": lambda _: " <image>",
|
|
152
|
+
"image_url": lambda _: " <image>",
|
|
153
|
+
"file": lambda _: " <file>",
|
|
154
|
+
"media": lambda _: " <file>",
|
|
155
|
+
"document": lambda _: " <file>",
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
return "".join(
|
|
159
|
+
type_mapping.get(item.get("type", ""), lambda item: f" <{item.get('type', '')}>")
|
|
160
|
+
(item) for item in input
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
def __parse_multimodal_history(self, messages: list[dict]) -> list[dict]:
|
|
164
|
+
# Parse the multimodal history and return a list of dictionaries
|
|
165
|
+
parsed_history = []
|
|
166
|
+
for message in messages:
|
|
167
|
+
if isinstance(message["content"], list):
|
|
168
|
+
parsed_content = self.__parse_multimodal_input(message["content"])
|
|
169
|
+
else:
|
|
170
|
+
parsed_content = message["content"]
|
|
171
|
+
parsed_history.append({
|
|
172
|
+
"role": message["role"],
|
|
173
|
+
"content": parsed_content
|
|
174
|
+
})
|
|
175
|
+
return parsed_history
|
|
176
|
+
|
|
177
|
+
def __is_message_generated(self, messages: list[BaseMessage]) -> bool:
|
|
178
|
+
# Check if the last message is generated by the model
|
|
179
|
+
if len(messages) == 0:
|
|
180
|
+
return False
|
|
181
|
+
last_user_message = f'<div class="llm__pill">{messages[-1].content}</div>'
|
|
182
|
+
last_ai_message = messages[-2].content
|
|
183
|
+
if last_user_message in last_ai_message:
|
|
184
|
+
return "true"
|
|
185
|
+
return "false"
|