waldiez 0.4.7__py3-none-any.whl → 0.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of waldiez might be problematic. Click here for more details.
- waldiez/__init__.py +5 -5
- waldiez/_version.py +1 -1
- waldiez/cli.py +112 -73
- waldiez/exporter.py +61 -19
- waldiez/exporting/__init__.py +25 -6
- waldiez/exporting/agent/__init__.py +7 -3
- waldiez/exporting/agent/code_execution.py +114 -0
- waldiez/exporting/agent/exporter.py +354 -0
- waldiez/exporting/agent/extras/__init__.py +15 -0
- waldiez/exporting/agent/extras/captain_agent_extras.py +315 -0
- waldiez/exporting/agent/extras/group/target.py +178 -0
- waldiez/exporting/agent/extras/group_manager_agent_extas.py +500 -0
- waldiez/exporting/agent/extras/group_member_extras.py +181 -0
- waldiez/exporting/agent/extras/handoffs/__init__.py +19 -0
- waldiez/exporting/agent/extras/handoffs/after_work.py +78 -0
- waldiez/exporting/agent/extras/handoffs/available.py +74 -0
- waldiez/exporting/agent/extras/handoffs/condition.py +158 -0
- waldiez/exporting/agent/extras/handoffs/handoff.py +171 -0
- waldiez/exporting/agent/extras/handoffs/target.py +189 -0
- waldiez/exporting/agent/extras/rag/__init__.py +10 -0
- waldiez/exporting/agent/{utils/rag_user/chroma_utils.py → extras/rag/chroma_extras.py} +16 -15
- waldiez/exporting/agent/{utils/rag_user/mongo_utils.py → extras/rag/mongo_extras.py} +10 -10
- waldiez/exporting/agent/{utils/rag_user/pgvector_utils.py → extras/rag/pgvector_extras.py} +13 -13
- waldiez/exporting/agent/{utils/rag_user/qdrant_utils.py → extras/rag/qdrant_extras.py} +13 -13
- waldiez/exporting/agent/{utils/rag_user/vector_db.py → extras/rag/vector_db_extras.py} +59 -46
- waldiez/exporting/agent/extras/rag_user_proxy_agent_extras.py +245 -0
- waldiez/exporting/agent/extras/reasoning_agent_extras.py +88 -0
- waldiez/exporting/agent/factory.py +95 -0
- waldiez/exporting/agent/processor.py +150 -0
- waldiez/exporting/agent/system_message.py +36 -0
- waldiez/exporting/agent/termination.py +50 -0
- waldiez/exporting/chats/__init__.py +7 -3
- waldiez/exporting/chats/exporter.py +97 -0
- waldiez/exporting/chats/factory.py +65 -0
- waldiez/exporting/chats/processor.py +226 -0
- waldiez/exporting/chats/utils/__init__.py +6 -5
- waldiez/exporting/chats/utils/common.py +11 -45
- waldiez/exporting/chats/utils/group.py +55 -0
- waldiez/exporting/chats/utils/nested.py +37 -52
- waldiez/exporting/chats/utils/sequential.py +72 -61
- waldiez/exporting/chats/utils/{single_chat.py → single.py} +48 -50
- waldiez/exporting/core/__init__.py +196 -0
- waldiez/exporting/core/constants.py +17 -0
- waldiez/exporting/core/content.py +69 -0
- waldiez/exporting/core/context.py +244 -0
- waldiez/exporting/core/enums.py +89 -0
- waldiez/exporting/core/errors.py +19 -0
- waldiez/exporting/core/exporter.py +390 -0
- waldiez/exporting/core/exporters.py +67 -0
- waldiez/exporting/core/extras/__init__.py +39 -0
- waldiez/exporting/core/extras/agent_extras/__init__.py +27 -0
- waldiez/exporting/core/extras/agent_extras/captain_extras.py +57 -0
- waldiez/exporting/core/extras/agent_extras/group_manager_extras.py +102 -0
- waldiez/exporting/core/extras/agent_extras/rag_user_extras.py +53 -0
- waldiez/exporting/core/extras/agent_extras/reasoning_extras.py +68 -0
- waldiez/exporting/core/extras/agent_extras/standard_extras.py +263 -0
- waldiez/exporting/core/extras/base.py +241 -0
- waldiez/exporting/core/extras/chat_extras.py +118 -0
- waldiez/exporting/core/extras/flow_extras.py +70 -0
- waldiez/exporting/core/extras/model_extras.py +73 -0
- waldiez/exporting/core/extras/path_resolver.py +93 -0
- waldiez/exporting/core/extras/serializer.py +138 -0
- waldiez/exporting/core/extras/tool_extras.py +82 -0
- waldiez/exporting/core/protocols.py +259 -0
- waldiez/exporting/core/result.py +705 -0
- waldiez/exporting/core/types.py +329 -0
- waldiez/exporting/core/utils/__init__.py +11 -0
- waldiez/exporting/core/utils/comment.py +33 -0
- waldiez/exporting/core/utils/llm_config.py +117 -0
- waldiez/exporting/core/validation.py +96 -0
- waldiez/exporting/flow/__init__.py +6 -2
- waldiez/exporting/flow/execution_generator.py +193 -0
- waldiez/exporting/flow/exporter.py +107 -0
- waldiez/exporting/flow/factory.py +94 -0
- waldiez/exporting/flow/file_generator.py +214 -0
- waldiez/exporting/flow/merger.py +387 -0
- waldiez/exporting/flow/orchestrator.py +411 -0
- waldiez/exporting/flow/utils/__init__.py +9 -36
- waldiez/exporting/flow/utils/common.py +206 -0
- waldiez/exporting/flow/utils/importing.py +373 -0
- waldiez/exporting/flow/utils/linting.py +200 -0
- waldiez/exporting/flow/utils/{logging_utils.py → logging.py} +23 -9
- waldiez/exporting/models/__init__.py +3 -1
- waldiez/exporting/models/exporter.py +233 -0
- waldiez/exporting/models/factory.py +66 -0
- waldiez/exporting/models/processor.py +139 -0
- waldiez/exporting/tools/__init__.py +11 -0
- waldiez/exporting/tools/exporter.py +207 -0
- waldiez/exporting/tools/factory.py +57 -0
- waldiez/exporting/tools/processor.py +248 -0
- waldiez/exporting/tools/registration.py +133 -0
- waldiez/io/__init__.py +128 -0
- waldiez/io/_ws.py +199 -0
- waldiez/io/models/__init__.py +60 -0
- waldiez/io/models/base.py +66 -0
- waldiez/io/models/constants.py +78 -0
- waldiez/io/models/content/__init__.py +23 -0
- waldiez/io/models/content/audio.py +43 -0
- waldiez/io/models/content/base.py +45 -0
- waldiez/io/models/content/file.py +43 -0
- waldiez/io/models/content/image.py +96 -0
- waldiez/io/models/content/text.py +37 -0
- waldiez/io/models/content/video.py +43 -0
- waldiez/io/models/user_input.py +269 -0
- waldiez/io/models/user_response.py +215 -0
- waldiez/io/mqtt.py +681 -0
- waldiez/io/redis.py +782 -0
- waldiez/io/structured.py +419 -0
- waldiez/io/utils.py +184 -0
- waldiez/io/ws.py +298 -0
- waldiez/logger.py +481 -0
- waldiez/models/__init__.py +108 -51
- waldiez/models/agents/__init__.py +34 -70
- waldiez/models/agents/agent/__init__.py +10 -4
- waldiez/models/agents/agent/agent.py +466 -65
- waldiez/models/agents/agent/agent_data.py +119 -47
- waldiez/models/agents/agent/agent_type.py +13 -2
- waldiez/models/agents/agent/code_execution.py +12 -12
- waldiez/models/agents/agent/human_input_mode.py +8 -0
- waldiez/models/agents/agent/{linked_skill.py → linked_tool.py} +7 -7
- waldiez/models/agents/agent/nested_chat.py +35 -7
- waldiez/models/agents/agent/termination_message.py +30 -22
- waldiez/models/agents/{swarm_agent → agent}/update_system_message.py +22 -22
- waldiez/models/agents/agents.py +58 -63
- waldiez/models/agents/assistant/assistant.py +4 -4
- waldiez/models/agents/assistant/assistant_data.py +13 -1
- waldiez/models/agents/{captain_agent → captain}/captain_agent.py +5 -5
- waldiez/models/agents/{captain_agent → captain}/captain_agent_data.py +5 -5
- waldiez/models/agents/extra_requirements.py +11 -16
- waldiez/models/agents/group_manager/group_manager.py +103 -13
- waldiez/models/agents/group_manager/group_manager_data.py +36 -14
- waldiez/models/agents/group_manager/speakers.py +77 -24
- waldiez/models/agents/{rag_user → rag_user_proxy}/__init__.py +16 -16
- waldiez/models/agents/rag_user_proxy/rag_user_proxy.py +64 -0
- waldiez/models/agents/{rag_user/rag_user_data.py → rag_user_proxy/rag_user_proxy_data.py} +6 -5
- waldiez/models/agents/{rag_user → rag_user_proxy}/retrieve_config.py +182 -114
- waldiez/models/agents/{rag_user → rag_user_proxy}/vector_db_config.py +13 -13
- waldiez/models/agents/reasoning/reasoning_agent.py +6 -6
- waldiez/models/agents/reasoning/reasoning_agent_data.py +110 -63
- waldiez/models/agents/reasoning/reasoning_agent_reason_config.py +38 -10
- waldiez/models/agents/user_proxy/user_proxy.py +11 -7
- waldiez/models/agents/user_proxy/user_proxy_data.py +2 -2
- waldiez/models/chat/__init__.py +2 -1
- waldiez/models/chat/chat.py +166 -87
- waldiez/models/chat/chat_data.py +99 -136
- waldiez/models/chat/chat_message.py +33 -23
- waldiez/models/chat/chat_nested.py +31 -30
- waldiez/models/chat/chat_summary.py +10 -8
- waldiez/models/common/__init__.py +52 -2
- waldiez/models/common/ag2_version.py +1 -1
- waldiez/models/common/base.py +38 -7
- waldiez/models/common/dict_utils.py +42 -17
- waldiez/models/common/handoff.py +459 -0
- waldiez/models/common/id_generator.py +19 -0
- waldiez/models/common/method_utils.py +130 -68
- waldiez/{exporting/base/utils → models/common}/naming.py +38 -61
- waldiez/models/common/waldiez_version.py +37 -0
- waldiez/models/flow/__init__.py +9 -2
- waldiez/models/flow/connection.py +18 -0
- waldiez/models/flow/flow.py +311 -215
- waldiez/models/flow/flow_data.py +207 -40
- waldiez/models/flow/info.py +85 -0
- waldiez/models/flow/naming.py +131 -0
- waldiez/models/model/__init__.py +7 -1
- waldiez/models/model/extra_requirements.py +3 -12
- waldiez/models/model/model.py +76 -21
- waldiez/models/model/model_data.py +108 -20
- waldiez/models/tool/__init__.py +16 -0
- waldiez/models/tool/extra_requirements.py +36 -0
- waldiez/models/{skill/skill.py → tool/tool.py} +88 -88
- waldiez/models/tool/tool_data.py +51 -0
- waldiez/models/tool/tool_type.py +8 -0
- waldiez/models/waldiez.py +97 -80
- waldiez/runner.py +114 -49
- waldiez/running/__init__.py +1 -1
- waldiez/running/environment.py +49 -68
- waldiez/running/gen_seq_diagram.py +16 -14
- waldiez/running/running.py +53 -34
- waldiez/utils/__init__.py +0 -4
- waldiez/utils/cli_extras/jupyter.py +5 -3
- waldiez/utils/cli_extras/runner.py +6 -4
- waldiez/utils/cli_extras/studio.py +6 -4
- waldiez/utils/conflict_checker.py +15 -9
- waldiez/utils/flaml_warnings.py +5 -5
- {waldiez-0.4.7.dist-info → waldiez-0.4.8.dist-info}/METADATA +235 -91
- waldiez-0.4.8.dist-info/RECORD +200 -0
- waldiez/exporting/agent/agent_exporter.py +0 -297
- waldiez/exporting/agent/utils/__init__.py +0 -23
- waldiez/exporting/agent/utils/captain_agent.py +0 -263
- waldiez/exporting/agent/utils/code_execution.py +0 -65
- waldiez/exporting/agent/utils/group_manager.py +0 -220
- waldiez/exporting/agent/utils/rag_user/__init__.py +0 -7
- waldiez/exporting/agent/utils/rag_user/rag_user.py +0 -209
- waldiez/exporting/agent/utils/reasoning.py +0 -36
- waldiez/exporting/agent/utils/swarm_agent.py +0 -469
- waldiez/exporting/agent/utils/teachability.py +0 -41
- waldiez/exporting/agent/utils/termination_message.py +0 -44
- waldiez/exporting/base/__init__.py +0 -25
- waldiez/exporting/base/agent_position.py +0 -75
- waldiez/exporting/base/base_exporter.py +0 -118
- waldiez/exporting/base/export_position.py +0 -48
- waldiez/exporting/base/import_position.py +0 -23
- waldiez/exporting/base/mixin.py +0 -137
- waldiez/exporting/base/utils/__init__.py +0 -18
- waldiez/exporting/base/utils/comments.py +0 -96
- waldiez/exporting/base/utils/path_check.py +0 -68
- waldiez/exporting/base/utils/to_string.py +0 -84
- waldiez/exporting/chats/chats_exporter.py +0 -240
- waldiez/exporting/chats/utils/swarm.py +0 -210
- waldiez/exporting/flow/flow_exporter.py +0 -528
- waldiez/exporting/flow/utils/agent_utils.py +0 -204
- waldiez/exporting/flow/utils/chat_utils.py +0 -71
- waldiez/exporting/flow/utils/def_main.py +0 -77
- waldiez/exporting/flow/utils/flow_content.py +0 -202
- waldiez/exporting/flow/utils/flow_names.py +0 -116
- waldiez/exporting/flow/utils/importing_utils.py +0 -227
- waldiez/exporting/models/models_exporter.py +0 -199
- waldiez/exporting/models/utils.py +0 -174
- waldiez/exporting/skills/__init__.py +0 -9
- waldiez/exporting/skills/skills_exporter.py +0 -176
- waldiez/exporting/skills/utils.py +0 -369
- waldiez/models/agents/agent/teachability.py +0 -70
- waldiez/models/agents/rag_user/rag_user.py +0 -60
- waldiez/models/agents/swarm_agent/__init__.py +0 -50
- waldiez/models/agents/swarm_agent/after_work.py +0 -179
- waldiez/models/agents/swarm_agent/on_condition.py +0 -105
- waldiez/models/agents/swarm_agent/on_condition_available.py +0 -142
- waldiez/models/agents/swarm_agent/on_condition_target.py +0 -40
- waldiez/models/agents/swarm_agent/swarm_agent.py +0 -107
- waldiez/models/agents/swarm_agent/swarm_agent_data.py +0 -124
- waldiez/models/flow/utils.py +0 -232
- waldiez/models/skill/__init__.py +0 -16
- waldiez/models/skill/extra_requirements.py +0 -36
- waldiez/models/skill/skill_data.py +0 -53
- waldiez/models/skill/skill_type.py +0 -8
- waldiez/utils/pysqlite3_checker.py +0 -308
- waldiez/utils/rdps_checker.py +0 -122
- waldiez-0.4.7.dist-info/RECORD +0 -149
- /waldiez/models/agents/{captain_agent → captain}/__init__.py +0 -0
- /waldiez/models/agents/{captain_agent → captain}/captain_agent_lib_entry.py +0 -0
- {waldiez-0.4.7.dist-info → waldiez-0.4.8.dist-info}/WHEEL +0 -0
- {waldiez-0.4.7.dist-info → waldiez-0.4.8.dist-info}/entry_points.txt +0 -0
- {waldiez-0.4.7.dist-info → waldiez-0.4.8.dist-info}/licenses/LICENSE +0 -0
- {waldiez-0.4.7.dist-info → waldiez-0.4.8.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -4,21 +4,21 @@
|
|
|
4
4
|
|
|
5
5
|
import os
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import
|
|
7
|
+
from typing import Optional, Union
|
|
8
8
|
|
|
9
9
|
from pydantic import Field, model_validator
|
|
10
10
|
from typing_extensions import Annotated, Literal, Self
|
|
11
11
|
|
|
12
12
|
from ...common import WaldiezBase, check_function, generate_function
|
|
13
|
-
from .vector_db_config import
|
|
13
|
+
from .vector_db_config import WaldiezRagUserProxyVectorDbConfig
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
WaldiezRagUserProxyTask = Literal["code", "qa", "default"]
|
|
16
16
|
"""Possible tasks for the retrieve chat."""
|
|
17
|
-
|
|
17
|
+
WaldiezRagUserProxyVectorDb = Literal["chroma", "pgvector", "mongodb", "qdrant"]
|
|
18
18
|
"""Possible vector dbs for the retrieve chat."""
|
|
19
|
-
|
|
19
|
+
WaldiezRagUserProxyChunkMode = Literal["multi_lines", "one_line"]
|
|
20
20
|
"""Possible chunk modes for the retrieve chat."""
|
|
21
|
-
|
|
21
|
+
WaldiezRagUserProxyModels: dict[WaldiezRagUserProxyVectorDb, str] = {
|
|
22
22
|
"chroma": "all-MiniLM-L6-v2",
|
|
23
23
|
"mongodb": "all-MiniLM-L6-v2",
|
|
24
24
|
"pgvector": "all-MiniLM-L6-v2",
|
|
@@ -26,8 +26,8 @@ WaldiezRagUserModels: Dict[WaldiezRagUserVectorDb, str] = {
|
|
|
26
26
|
}
|
|
27
27
|
|
|
28
28
|
CUSTOM_EMBEDDING_FUNCTION = "custom_embedding_function"
|
|
29
|
-
CUSTOM_EMBEDDING_FUNCTION_ARGS:
|
|
30
|
-
CUSTOM_EMBEDDING_FUNCTION_TYPES:
|
|
29
|
+
CUSTOM_EMBEDDING_FUNCTION_ARGS: list[str] = []
|
|
30
|
+
CUSTOM_EMBEDDING_FUNCTION_TYPES: tuple[list[str], str] = (
|
|
31
31
|
[],
|
|
32
32
|
"Callable[..., Any]",
|
|
33
33
|
)
|
|
@@ -48,7 +48,7 @@ CUSTOM_TEXT_SPLIT_FUNCTION_ARGS = [
|
|
|
48
48
|
]
|
|
49
49
|
CUSTOM_TEXT_SPLIT_FUNCTION_TYPES = (
|
|
50
50
|
["str", "int", "str", "bool", "int"],
|
|
51
|
-
"
|
|
51
|
+
"list[str]",
|
|
52
52
|
)
|
|
53
53
|
NOT_LOCAL = (
|
|
54
54
|
"http://",
|
|
@@ -56,10 +56,14 @@ NOT_LOCAL = (
|
|
|
56
56
|
"ftp://",
|
|
57
57
|
"ftps://",
|
|
58
58
|
"sftp://",
|
|
59
|
+
"hdfs",
|
|
60
|
+
"s3://",
|
|
61
|
+
"gs://",
|
|
62
|
+
"azure://",
|
|
59
63
|
)
|
|
60
64
|
|
|
61
65
|
|
|
62
|
-
class
|
|
66
|
+
class WaldiezRagUserProxyRetrieveConfig(WaldiezBase):
|
|
63
67
|
"""RAG user agent.
|
|
64
68
|
|
|
65
69
|
Attributes
|
|
@@ -74,7 +78,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
74
78
|
The vector db for the retrieve chat.
|
|
75
79
|
db_config : Annotated[WaldiezVectorDbConfig, Field]
|
|
76
80
|
The config for the selected vector db.
|
|
77
|
-
docs_path : Optional[Union[str,
|
|
81
|
+
docs_path : Optional[Union[str, list[str]]]
|
|
78
82
|
The path to the docs directory. It can also be the path to a single
|
|
79
83
|
file, the url to a single file or a list of directories, files and
|
|
80
84
|
urls. Default is None, which works only if the collection is already
|
|
@@ -147,7 +151,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
147
151
|
A custom function to split a string into a list of strings. Default is
|
|
148
152
|
None, will use the default function in autogen.retrieve_utils.
|
|
149
153
|
split_text_to_chunks.
|
|
150
|
-
custom_text_types : Optional[
|
|
154
|
+
custom_text_types : Optional[list[str]]
|
|
151
155
|
A list of file types to be processed. Default is autogen.retrieve_utils.
|
|
152
156
|
TEXT_FORMATS. This only applies to files under the directories in
|
|
153
157
|
docs_path. Explicitly included files and urls will be chunked
|
|
@@ -180,9 +184,9 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
180
184
|
"""
|
|
181
185
|
|
|
182
186
|
task: Annotated[
|
|
183
|
-
|
|
187
|
+
WaldiezRagUserProxyTask,
|
|
184
188
|
Field(
|
|
185
|
-
"default",
|
|
189
|
+
default="default",
|
|
186
190
|
title="Task",
|
|
187
191
|
description=(
|
|
188
192
|
"The task of the retrieve chat. "
|
|
@@ -193,25 +197,25 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
193
197
|
"the response."
|
|
194
198
|
),
|
|
195
199
|
),
|
|
196
|
-
]
|
|
200
|
+
] = "default"
|
|
197
201
|
vector_db: Annotated[
|
|
198
|
-
|
|
202
|
+
WaldiezRagUserProxyVectorDb,
|
|
199
203
|
Field(
|
|
200
|
-
"chroma",
|
|
204
|
+
default="chroma",
|
|
201
205
|
title="Vector DB",
|
|
202
206
|
description="The vector db for the retrieve chat.",
|
|
203
207
|
),
|
|
204
|
-
]
|
|
208
|
+
] = "chroma"
|
|
205
209
|
db_config: Annotated[
|
|
206
|
-
|
|
210
|
+
WaldiezRagUserProxyVectorDbConfig,
|
|
207
211
|
Field(
|
|
208
212
|
title="DB Config",
|
|
209
213
|
description="The config for the selected vector db.",
|
|
210
|
-
default_factory=
|
|
214
|
+
default_factory=WaldiezRagUserProxyVectorDbConfig,
|
|
211
215
|
),
|
|
212
216
|
]
|
|
213
217
|
docs_path: Annotated[
|
|
214
|
-
Optional[Union[str,
|
|
218
|
+
Optional[Union[str, list[str]]],
|
|
215
219
|
Field(
|
|
216
220
|
default=None,
|
|
217
221
|
title="Docs Path",
|
|
@@ -222,7 +226,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
222
226
|
"only if the collection is already created."
|
|
223
227
|
),
|
|
224
228
|
),
|
|
225
|
-
]
|
|
229
|
+
] = None
|
|
226
230
|
new_docs: Annotated[
|
|
227
231
|
bool,
|
|
228
232
|
Field(
|
|
@@ -236,7 +240,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
236
240
|
"hash value of the content."
|
|
237
241
|
),
|
|
238
242
|
),
|
|
239
|
-
]
|
|
243
|
+
] = True
|
|
240
244
|
model: Annotated[
|
|
241
245
|
Optional[str],
|
|
242
246
|
Field(
|
|
@@ -247,7 +251,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
247
251
|
"we check for models linked to the agent."
|
|
248
252
|
),
|
|
249
253
|
),
|
|
250
|
-
]
|
|
254
|
+
] = None
|
|
251
255
|
chunk_token_size: Annotated[
|
|
252
256
|
Optional[int],
|
|
253
257
|
Field(
|
|
@@ -259,7 +263,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
259
263
|
"will be used."
|
|
260
264
|
),
|
|
261
265
|
),
|
|
262
|
-
]
|
|
266
|
+
] = None
|
|
263
267
|
context_max_tokens: Annotated[
|
|
264
268
|
Optional[int],
|
|
265
269
|
Field(
|
|
@@ -271,9 +275,9 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
271
275
|
"will be used."
|
|
272
276
|
),
|
|
273
277
|
),
|
|
274
|
-
]
|
|
278
|
+
] = None
|
|
275
279
|
chunk_mode: Annotated[
|
|
276
|
-
|
|
280
|
+
WaldiezRagUserProxyChunkMode,
|
|
277
281
|
Field(
|
|
278
282
|
default="multi_lines",
|
|
279
283
|
title="Chunk Mode",
|
|
@@ -283,8 +287,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
283
287
|
"a default mode multi_lines will be used."
|
|
284
288
|
),
|
|
285
289
|
),
|
|
286
|
-
]
|
|
287
|
-
|
|
290
|
+
] = "multi_lines"
|
|
288
291
|
must_break_at_empty_line: Annotated[
|
|
289
292
|
bool,
|
|
290
293
|
Field(
|
|
@@ -295,7 +298,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
295
298
|
"If chunk_mode is 'one_line', this parameter will be ignored."
|
|
296
299
|
),
|
|
297
300
|
),
|
|
298
|
-
]
|
|
301
|
+
] = True
|
|
299
302
|
use_custom_embedding: Annotated[
|
|
300
303
|
bool,
|
|
301
304
|
Field(
|
|
@@ -307,7 +310,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
307
310
|
"provided."
|
|
308
311
|
),
|
|
309
312
|
),
|
|
310
|
-
]
|
|
313
|
+
] = False
|
|
311
314
|
embedding_function: Annotated[
|
|
312
315
|
Optional[str],
|
|
313
316
|
Field(
|
|
@@ -322,7 +325,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
322
325
|
"https://docs.trychroma.com/guides/embeddings."
|
|
323
326
|
),
|
|
324
327
|
),
|
|
325
|
-
]
|
|
328
|
+
] = None
|
|
326
329
|
customized_prompt: Annotated[
|
|
327
330
|
Optional[str],
|
|
328
331
|
Field(
|
|
@@ -332,7 +335,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
332
335
|
"The customized prompt for the retrieve chat. Default is None."
|
|
333
336
|
),
|
|
334
337
|
),
|
|
335
|
-
]
|
|
338
|
+
] = None
|
|
336
339
|
customized_answer_prefix: Annotated[
|
|
337
340
|
Optional[str],
|
|
338
341
|
Field(
|
|
@@ -344,7 +347,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
344
347
|
"not in the answer, Update Context will be triggered."
|
|
345
348
|
),
|
|
346
349
|
),
|
|
347
|
-
]
|
|
350
|
+
] = ""
|
|
348
351
|
update_context: Annotated[
|
|
349
352
|
bool,
|
|
350
353
|
Field(
|
|
@@ -355,7 +358,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
355
358
|
"retrieval. Default is True."
|
|
356
359
|
),
|
|
357
360
|
),
|
|
358
|
-
]
|
|
361
|
+
] = True
|
|
359
362
|
collection_name: Annotated[
|
|
360
363
|
str,
|
|
361
364
|
Field(
|
|
@@ -366,7 +369,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
366
369
|
"a default name autogen-docs will be used."
|
|
367
370
|
),
|
|
368
371
|
),
|
|
369
|
-
]
|
|
372
|
+
] = "autogen-docs"
|
|
370
373
|
get_or_create: Annotated[
|
|
371
374
|
bool,
|
|
372
375
|
Field(
|
|
@@ -376,7 +379,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
376
379
|
"Whether to get the collection if it exists. Default is False."
|
|
377
380
|
),
|
|
378
381
|
),
|
|
379
|
-
]
|
|
382
|
+
] = False
|
|
380
383
|
overwrite: Annotated[
|
|
381
384
|
bool,
|
|
382
385
|
Field(
|
|
@@ -394,7 +397,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
394
397
|
" otherwise it raise a ValueError."
|
|
395
398
|
),
|
|
396
399
|
),
|
|
397
|
-
]
|
|
400
|
+
] = False
|
|
398
401
|
use_custom_token_count: Annotated[
|
|
399
402
|
bool,
|
|
400
403
|
Field(
|
|
@@ -406,7 +409,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
406
409
|
"custom_token_count_function should be provided."
|
|
407
410
|
),
|
|
408
411
|
),
|
|
409
|
-
]
|
|
412
|
+
] = False
|
|
410
413
|
custom_token_count_function: Annotated[
|
|
411
414
|
Optional[str],
|
|
412
415
|
Field(
|
|
@@ -421,7 +424,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
421
424
|
"tiktoken, which may not be accurate for non-OpenAI models."
|
|
422
425
|
),
|
|
423
426
|
),
|
|
424
|
-
]
|
|
427
|
+
] = None
|
|
425
428
|
use_custom_text_split: Annotated[
|
|
426
429
|
bool,
|
|
427
430
|
Field(
|
|
@@ -433,7 +436,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
433
436
|
"custom_text_split_function should be provided."
|
|
434
437
|
),
|
|
435
438
|
),
|
|
436
|
-
]
|
|
439
|
+
] = False
|
|
437
440
|
custom_text_split_function: Annotated[
|
|
438
441
|
Optional[str],
|
|
439
442
|
Field(
|
|
@@ -445,9 +448,9 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
445
448
|
"autogen.retrieve_utils.split_text_to_chunks."
|
|
446
449
|
),
|
|
447
450
|
),
|
|
448
|
-
]
|
|
451
|
+
] = None
|
|
449
452
|
custom_text_types: Annotated[
|
|
450
|
-
Optional[
|
|
453
|
+
Optional[list[str]],
|
|
451
454
|
Field(
|
|
452
455
|
default=None,
|
|
453
456
|
title="Custom Text Types",
|
|
@@ -459,7 +462,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
459
462
|
"chunked regardless of their types."
|
|
460
463
|
),
|
|
461
464
|
),
|
|
462
|
-
]
|
|
465
|
+
] = None
|
|
463
466
|
recursive: Annotated[
|
|
464
467
|
bool,
|
|
465
468
|
Field(
|
|
@@ -470,7 +473,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
470
473
|
"Default is True."
|
|
471
474
|
),
|
|
472
475
|
),
|
|
473
|
-
]
|
|
476
|
+
] = True
|
|
474
477
|
distance_threshold: Annotated[
|
|
475
478
|
float,
|
|
476
479
|
Field(
|
|
@@ -482,7 +485,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
482
485
|
"Will be ignored if < 0. Default is -1."
|
|
483
486
|
),
|
|
484
487
|
),
|
|
485
|
-
]
|
|
488
|
+
] = -1
|
|
486
489
|
n_results: Annotated[
|
|
487
490
|
Optional[int],
|
|
488
491
|
Field(
|
|
@@ -494,7 +497,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
494
497
|
"Use None or <1 to return all results."
|
|
495
498
|
),
|
|
496
499
|
),
|
|
497
|
-
]
|
|
500
|
+
] = None
|
|
498
501
|
_embedding_function_string: Optional[str] = None
|
|
499
502
|
|
|
500
503
|
_token_count_function_string: Optional[str] = None
|
|
@@ -538,7 +541,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
538
541
|
self,
|
|
539
542
|
name_prefix: Optional[str] = None,
|
|
540
543
|
name_suffix: Optional[str] = None,
|
|
541
|
-
) ->
|
|
544
|
+
) -> tuple[str, str]:
|
|
542
545
|
"""Generate the custom embedding function.
|
|
543
546
|
|
|
544
547
|
Parameters
|
|
@@ -550,7 +553,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
550
553
|
|
|
551
554
|
Returns
|
|
552
555
|
-------
|
|
553
|
-
|
|
556
|
+
tuple[str, str]
|
|
554
557
|
The custom embedding function and the function name.
|
|
555
558
|
"""
|
|
556
559
|
function_name = CUSTOM_EMBEDDING_FUNCTION
|
|
@@ -572,7 +575,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
572
575
|
self,
|
|
573
576
|
name_prefix: Optional[str] = None,
|
|
574
577
|
name_suffix: Optional[str] = None,
|
|
575
|
-
) ->
|
|
578
|
+
) -> tuple[str, str]:
|
|
576
579
|
"""Generate the custom token count function.
|
|
577
580
|
|
|
578
581
|
Parameters
|
|
@@ -584,7 +587,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
584
587
|
|
|
585
588
|
Returns
|
|
586
589
|
-------
|
|
587
|
-
|
|
590
|
+
tuple[str, str]
|
|
588
591
|
The custom token count function and the function name.
|
|
589
592
|
"""
|
|
590
593
|
function_name = CUSTOM_TOKEN_COUNT_FUNCTION
|
|
@@ -606,7 +609,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
606
609
|
self,
|
|
607
610
|
name_prefix: Optional[str] = None,
|
|
608
611
|
name_suffix: Optional[str] = None,
|
|
609
|
-
) ->
|
|
612
|
+
) -> tuple[str, str]:
|
|
610
613
|
"""Generate the custom text split function.
|
|
611
614
|
|
|
612
615
|
Parameters
|
|
@@ -618,7 +621,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
618
621
|
|
|
619
622
|
Returns
|
|
620
623
|
-------
|
|
621
|
-
|
|
624
|
+
tuple[str, str]
|
|
622
625
|
The custom text split function and the function name.
|
|
623
626
|
"""
|
|
624
627
|
function_name = CUSTOM_TEXT_SPLIT_FUNCTION
|
|
@@ -715,35 +718,50 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
715
718
|
"""
|
|
716
719
|
if not self.docs_path:
|
|
717
720
|
return
|
|
718
|
-
|
|
721
|
+
|
|
722
|
+
# Normalize to list
|
|
719
723
|
doc_paths = (
|
|
720
724
|
[self.docs_path]
|
|
721
725
|
if isinstance(self.docs_path, str)
|
|
722
726
|
else self.docs_path
|
|
723
727
|
)
|
|
724
|
-
|
|
728
|
+
|
|
729
|
+
validated_paths: list[str] = []
|
|
730
|
+
|
|
725
731
|
for path in doc_paths:
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
if is_remote:
|
|
729
|
-
if not is_raw:
|
|
730
|
-
resolved = f'r"{resolved}"'
|
|
731
|
-
if resolved not in paths:
|
|
732
|
-
paths.append(resolved)
|
|
732
|
+
# Skip duplicates
|
|
733
|
+
if path in validated_paths:
|
|
733
734
|
continue
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
if
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
paths.append(resolved)
|
|
735
|
+
|
|
736
|
+
# Check if it's a remote path
|
|
737
|
+
is_remote = is_remote_path(path)
|
|
738
|
+
if is_remote:
|
|
739
|
+
# Remote paths: ensure proper raw string wrapping if needed
|
|
740
|
+
content = extract_raw_string_content(path)
|
|
741
|
+
validated_paths.append(f'r"{content}"')
|
|
742
742
|
continue
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
743
|
+
|
|
744
|
+
# Handle local paths
|
|
745
|
+
# First remove any file:// scheme
|
|
746
|
+
cleaned_path = remove_file_scheme(path)
|
|
747
|
+
content = extract_raw_string_content(cleaned_path)
|
|
748
|
+
|
|
749
|
+
# Determine if it's likely a folder
|
|
750
|
+
is_folder = string_represents_folder(content)
|
|
751
|
+
|
|
752
|
+
if is_folder:
|
|
753
|
+
validated_paths.append(f'r"{content}"')
|
|
754
|
+
else:
|
|
755
|
+
# Files: resolve and validate existence
|
|
756
|
+
try:
|
|
757
|
+
resolved_path = resolve_path(cleaned_path, must_exist=True)
|
|
758
|
+
validated_paths.append(resolved_path)
|
|
759
|
+
except ValueError as e:
|
|
760
|
+
raise ValueError(f"Invalid file path '{path}': {e}") from e
|
|
761
|
+
|
|
762
|
+
# remove dupes (but keep order)
|
|
763
|
+
validated_paths = list(dict.fromkeys(validated_paths))
|
|
764
|
+
self.docs_path = [path for path in validated_paths if path]
|
|
747
765
|
|
|
748
766
|
@model_validator(mode="after")
|
|
749
767
|
def validate_rag_user_data(self) -> Self:
|
|
@@ -756,7 +774,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
756
774
|
|
|
757
775
|
Returns
|
|
758
776
|
-------
|
|
759
|
-
|
|
777
|
+
WaldiezRagUserProxyData
|
|
760
778
|
The validated RAG user data.
|
|
761
779
|
"""
|
|
762
780
|
self.validate_custom_embedding_function()
|
|
@@ -764,12 +782,35 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
|
|
|
764
782
|
self.validate_custom_text_split_function()
|
|
765
783
|
self.validate_docs_path()
|
|
766
784
|
if not self.db_config.model:
|
|
767
|
-
self.db_config.model =
|
|
785
|
+
self.db_config.model = WaldiezRagUserProxyModels[self.vector_db]
|
|
768
786
|
if isinstance(self.n_results, int) and self.n_results < 1:
|
|
769
787
|
self.n_results = None
|
|
770
788
|
return self
|
|
771
789
|
|
|
772
790
|
|
|
791
|
+
def extract_raw_string_content(path: str) -> str:
|
|
792
|
+
"""Extract content from potential raw string formats.
|
|
793
|
+
|
|
794
|
+
Parameters
|
|
795
|
+
----------
|
|
796
|
+
path : str
|
|
797
|
+
The path that might be wrapped in raw string format.
|
|
798
|
+
|
|
799
|
+
Returns
|
|
800
|
+
-------
|
|
801
|
+
str
|
|
802
|
+
The actual content of the path, without raw string formatting.
|
|
803
|
+
"""
|
|
804
|
+
# Handle r"..." and r'...'
|
|
805
|
+
if path.startswith(('r"', "r'")) and len(path) > 3:
|
|
806
|
+
quote = path[1]
|
|
807
|
+
if path.endswith(quote):
|
|
808
|
+
return path[2:-1]
|
|
809
|
+
# Handle malformed raw strings (missing end quote)
|
|
810
|
+
return path[2:]
|
|
811
|
+
return path
|
|
812
|
+
|
|
813
|
+
|
|
773
814
|
def string_represents_folder(path: str) -> bool:
|
|
774
815
|
"""Check if a string represents a folder.
|
|
775
816
|
|
|
@@ -783,14 +824,27 @@ def string_represents_folder(path: str) -> bool:
|
|
|
783
824
|
bool
|
|
784
825
|
True if the path is likely a folder, False if it's likely a file.
|
|
785
826
|
"""
|
|
786
|
-
if
|
|
787
|
-
|
|
788
|
-
|
|
827
|
+
# Extract actual path content if wrapped
|
|
828
|
+
content = extract_raw_string_content(path)
|
|
829
|
+
|
|
830
|
+
# Explicit folder indicators
|
|
831
|
+
if content.endswith(("/", "\\", os.path.sep)):
|
|
789
832
|
return True
|
|
790
|
-
return not os.path.splitext(path)[1]
|
|
791
833
|
|
|
834
|
+
# Check if it actually exists and is a directory
|
|
835
|
+
try:
|
|
836
|
+
if os.path.isdir(content):
|
|
837
|
+
return True
|
|
838
|
+
except (OSError, ValueError): # pragma: no cover
|
|
839
|
+
pass
|
|
840
|
+
|
|
841
|
+
# Heuristic: no file extension likely means folder
|
|
842
|
+
# return not os.path.splitext(content)[1]
|
|
843
|
+
_, ext = os.path.splitext(path.rstrip("/\\"))
|
|
844
|
+
return not ext
|
|
792
845
|
|
|
793
|
-
|
|
846
|
+
|
|
847
|
+
def is_remote_path(path: str) -> bool:
|
|
794
848
|
"""Check if a path is a remote path.
|
|
795
849
|
|
|
796
850
|
Parameters
|
|
@@ -800,14 +854,14 @@ def is_remote_path(path: str) -> Tuple[bool, bool]:
|
|
|
800
854
|
|
|
801
855
|
Returns
|
|
802
856
|
-------
|
|
803
|
-
|
|
857
|
+
tuple[bool, bool]
|
|
804
858
|
If the path is a remote path and if it's a raw string.
|
|
805
859
|
"""
|
|
806
|
-
|
|
860
|
+
content = extract_raw_string_content(path)
|
|
807
861
|
for not_local in NOT_LOCAL:
|
|
808
|
-
if
|
|
809
|
-
return True
|
|
810
|
-
return False
|
|
862
|
+
if content.startswith((not_local, f'r"{not_local}', f"r'{not_local}")):
|
|
863
|
+
return True
|
|
864
|
+
return False
|
|
811
865
|
|
|
812
866
|
|
|
813
867
|
def remove_file_scheme(path: str) -> str:
|
|
@@ -823,54 +877,68 @@ def remove_file_scheme(path: str) -> str:
|
|
|
823
877
|
str
|
|
824
878
|
The path without the scheme.
|
|
825
879
|
"""
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
while
|
|
830
|
-
|
|
831
|
-
while resolved.startswith("file://"):
|
|
832
|
-
resolved = resolved[len("file://") :]
|
|
833
|
-
return resolved
|
|
880
|
+
content = extract_raw_string_content(path)
|
|
881
|
+
|
|
882
|
+
# Remove file:// prefix
|
|
883
|
+
while content.startswith("file://"):
|
|
884
|
+
content = content[len("file://") :]
|
|
834
885
|
|
|
886
|
+
return f'r"{content}"'
|
|
835
887
|
|
|
836
|
-
|
|
888
|
+
|
|
889
|
+
def resolve_path(path: str, must_exist: bool) -> str:
|
|
837
890
|
"""Try to resolve a path.
|
|
838
891
|
|
|
839
892
|
Parameters
|
|
840
893
|
----------
|
|
841
894
|
path : str
|
|
842
895
|
The path to resolve.
|
|
843
|
-
is_raw : bool
|
|
844
|
-
If the path is a raw string.
|
|
845
896
|
must_exist : bool
|
|
846
897
|
If the path must exist.
|
|
847
898
|
|
|
848
899
|
Returns
|
|
849
900
|
-------
|
|
850
|
-
|
|
851
|
-
The resolved path.
|
|
901
|
+
str
|
|
902
|
+
The resolved path, potentially wrapped in raw string format.
|
|
852
903
|
|
|
853
904
|
Raises
|
|
854
905
|
------
|
|
855
906
|
ValueError
|
|
856
907
|
If the path is not a valid local path.
|
|
857
908
|
"""
|
|
858
|
-
#
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
909
|
+
# Extract the actual path content
|
|
910
|
+
# if is_raw:
|
|
911
|
+
path_content = extract_raw_string_content(path)
|
|
912
|
+
# else:
|
|
913
|
+
# path_content = path
|
|
914
|
+
|
|
915
|
+
# Handle JSON-escaped backslashes
|
|
916
|
+
if "\\\\" in path_content: # pragma: no cover
|
|
917
|
+
path_content = path_content.replace("\\\\", "\\")
|
|
918
|
+
# pylint: disable=too-many-try-statements
|
|
862
919
|
try:
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
920
|
+
# Try to resolve the path
|
|
921
|
+
resolved = Path(path_content).resolve()
|
|
922
|
+
|
|
923
|
+
if must_exist and not resolved.exists():
|
|
924
|
+
raise ValueError(f"Path {path} does not exist.")
|
|
925
|
+
|
|
926
|
+
return f'r"{resolved}"'
|
|
927
|
+
|
|
928
|
+
except (
|
|
929
|
+
OSError,
|
|
930
|
+
UnicodeDecodeError,
|
|
931
|
+
ValueError,
|
|
932
|
+
) as error: # pragma: no cover
|
|
933
|
+
# Fallback: try as raw string for Windows compatibility
|
|
934
|
+
raw_version = f'r"{path_content}"'
|
|
867
935
|
try:
|
|
868
|
-
|
|
869
|
-
|
|
936
|
+
# Test if the path can be resolved when treated as raw
|
|
937
|
+
resolved = Path(raw_version).resolve()
|
|
938
|
+
if must_exist and not resolved.exists():
|
|
939
|
+
raise ValueError(f"Path {path} does not exist.") from error
|
|
940
|
+
return raw_version
|
|
941
|
+
except Exception:
|
|
870
942
|
raise ValueError(
|
|
871
|
-
f"Path {path} is not a valid local path
|
|
943
|
+
f"Path {path} is not a valid local path: {error}"
|
|
872
944
|
) from error
|
|
873
|
-
return raw_string
|
|
874
|
-
if not resolved.exists() and must_exist:
|
|
875
|
-
raise ValueError(f"Path {path} does not exist.")
|
|
876
|
-
return f'r"{resolved}"'
|