waldiez 0.4.7__py3-none-any.whl → 0.4.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of waldiez might be problematic. Click here for more details.

Files changed (248) hide show
  1. waldiez/__init__.py +5 -5
  2. waldiez/_version.py +1 -1
  3. waldiez/cli.py +97 -102
  4. waldiez/exporter.py +61 -19
  5. waldiez/exporting/__init__.py +25 -6
  6. waldiez/exporting/agent/__init__.py +7 -3
  7. waldiez/exporting/agent/code_execution.py +114 -0
  8. waldiez/exporting/agent/exporter.py +354 -0
  9. waldiez/exporting/agent/extras/__init__.py +15 -0
  10. waldiez/exporting/agent/extras/captain_agent_extras.py +315 -0
  11. waldiez/exporting/agent/extras/group/target.py +178 -0
  12. waldiez/exporting/agent/extras/group_manager_agent_extas.py +500 -0
  13. waldiez/exporting/agent/extras/group_member_extras.py +181 -0
  14. waldiez/exporting/agent/extras/handoffs/__init__.py +19 -0
  15. waldiez/exporting/agent/extras/handoffs/after_work.py +78 -0
  16. waldiez/exporting/agent/extras/handoffs/available.py +74 -0
  17. waldiez/exporting/agent/extras/handoffs/condition.py +158 -0
  18. waldiez/exporting/agent/extras/handoffs/handoff.py +171 -0
  19. waldiez/exporting/agent/extras/handoffs/target.py +189 -0
  20. waldiez/exporting/agent/extras/rag/__init__.py +10 -0
  21. waldiez/exporting/agent/{utils/rag_user/chroma_utils.py → extras/rag/chroma_extras.py} +37 -24
  22. waldiez/exporting/agent/{utils/rag_user/mongo_utils.py → extras/rag/mongo_extras.py} +10 -10
  23. waldiez/exporting/agent/{utils/rag_user/pgvector_utils.py → extras/rag/pgvector_extras.py} +13 -13
  24. waldiez/exporting/agent/{utils/rag_user/qdrant_utils.py → extras/rag/qdrant_extras.py} +13 -13
  25. waldiez/exporting/agent/{utils/rag_user/vector_db.py → extras/rag/vector_db_extras.py} +59 -46
  26. waldiez/exporting/agent/extras/rag_user_proxy_agent_extras.py +245 -0
  27. waldiez/exporting/agent/extras/reasoning_agent_extras.py +88 -0
  28. waldiez/exporting/agent/factory.py +95 -0
  29. waldiez/exporting/agent/processor.py +150 -0
  30. waldiez/exporting/agent/system_message.py +36 -0
  31. waldiez/exporting/agent/termination.py +50 -0
  32. waldiez/exporting/chats/__init__.py +7 -3
  33. waldiez/exporting/chats/exporter.py +97 -0
  34. waldiez/exporting/chats/factory.py +65 -0
  35. waldiez/exporting/chats/processor.py +226 -0
  36. waldiez/exporting/chats/utils/__init__.py +6 -5
  37. waldiez/exporting/chats/utils/common.py +11 -45
  38. waldiez/exporting/chats/utils/group.py +55 -0
  39. waldiez/exporting/chats/utils/nested.py +37 -52
  40. waldiez/exporting/chats/utils/sequential.py +72 -61
  41. waldiez/exporting/chats/utils/{single_chat.py → single.py} +48 -50
  42. waldiez/exporting/core/__init__.py +196 -0
  43. waldiez/exporting/core/constants.py +17 -0
  44. waldiez/exporting/core/content.py +69 -0
  45. waldiez/exporting/core/context.py +244 -0
  46. waldiez/exporting/core/enums.py +89 -0
  47. waldiez/exporting/core/errors.py +19 -0
  48. waldiez/exporting/core/exporter.py +390 -0
  49. waldiez/exporting/core/exporters.py +67 -0
  50. waldiez/exporting/core/extras/__init__.py +39 -0
  51. waldiez/exporting/core/extras/agent_extras/__init__.py +27 -0
  52. waldiez/exporting/core/extras/agent_extras/captain_extras.py +57 -0
  53. waldiez/exporting/core/extras/agent_extras/group_manager_extras.py +102 -0
  54. waldiez/exporting/core/extras/agent_extras/rag_user_extras.py +53 -0
  55. waldiez/exporting/core/extras/agent_extras/reasoning_extras.py +68 -0
  56. waldiez/exporting/core/extras/agent_extras/standard_extras.py +263 -0
  57. waldiez/exporting/core/extras/base.py +241 -0
  58. waldiez/exporting/core/extras/chat_extras.py +118 -0
  59. waldiez/exporting/core/extras/flow_extras.py +70 -0
  60. waldiez/exporting/core/extras/model_extras.py +73 -0
  61. waldiez/exporting/core/extras/path_resolver.py +93 -0
  62. waldiez/exporting/core/extras/serializer.py +138 -0
  63. waldiez/exporting/core/extras/tool_extras.py +82 -0
  64. waldiez/exporting/core/protocols.py +259 -0
  65. waldiez/exporting/core/result.py +705 -0
  66. waldiez/exporting/core/types.py +329 -0
  67. waldiez/exporting/core/utils/__init__.py +11 -0
  68. waldiez/exporting/core/utils/comment.py +33 -0
  69. waldiez/exporting/core/utils/llm_config.py +117 -0
  70. waldiez/exporting/core/validation.py +96 -0
  71. waldiez/exporting/flow/__init__.py +6 -2
  72. waldiez/exporting/flow/execution_generator.py +193 -0
  73. waldiez/exporting/flow/exporter.py +107 -0
  74. waldiez/exporting/flow/factory.py +94 -0
  75. waldiez/exporting/flow/file_generator.py +214 -0
  76. waldiez/exporting/flow/merger.py +387 -0
  77. waldiez/exporting/flow/orchestrator.py +411 -0
  78. waldiez/exporting/flow/utils/__init__.py +9 -36
  79. waldiez/exporting/flow/utils/common.py +206 -0
  80. waldiez/exporting/flow/utils/importing.py +373 -0
  81. waldiez/exporting/flow/utils/linting.py +200 -0
  82. waldiez/exporting/flow/utils/{logging_utils.py → logging.py} +23 -9
  83. waldiez/exporting/models/__init__.py +3 -1
  84. waldiez/exporting/models/exporter.py +233 -0
  85. waldiez/exporting/models/factory.py +66 -0
  86. waldiez/exporting/models/processor.py +139 -0
  87. waldiez/exporting/tools/__init__.py +11 -0
  88. waldiez/exporting/tools/exporter.py +207 -0
  89. waldiez/exporting/tools/factory.py +57 -0
  90. waldiez/exporting/tools/processor.py +248 -0
  91. waldiez/exporting/tools/registration.py +133 -0
  92. waldiez/io/__init__.py +128 -0
  93. waldiez/io/_ws.py +199 -0
  94. waldiez/io/models/__init__.py +60 -0
  95. waldiez/io/models/base.py +66 -0
  96. waldiez/io/models/constants.py +78 -0
  97. waldiez/io/models/content/__init__.py +23 -0
  98. waldiez/io/models/content/audio.py +43 -0
  99. waldiez/io/models/content/base.py +45 -0
  100. waldiez/io/models/content/file.py +43 -0
  101. waldiez/io/models/content/image.py +96 -0
  102. waldiez/io/models/content/text.py +37 -0
  103. waldiez/io/models/content/video.py +43 -0
  104. waldiez/io/models/user_input.py +269 -0
  105. waldiez/io/models/user_response.py +215 -0
  106. waldiez/io/mqtt.py +681 -0
  107. waldiez/io/redis.py +782 -0
  108. waldiez/io/structured.py +439 -0
  109. waldiez/io/utils.py +184 -0
  110. waldiez/io/ws.py +298 -0
  111. waldiez/logger.py +481 -0
  112. waldiez/models/__init__.py +108 -51
  113. waldiez/models/agents/__init__.py +34 -70
  114. waldiez/models/agents/agent/__init__.py +10 -4
  115. waldiez/models/agents/agent/agent.py +466 -65
  116. waldiez/models/agents/agent/agent_data.py +119 -47
  117. waldiez/models/agents/agent/agent_type.py +13 -2
  118. waldiez/models/agents/agent/code_execution.py +12 -12
  119. waldiez/models/agents/agent/human_input_mode.py +8 -0
  120. waldiez/models/agents/agent/{linked_skill.py → linked_tool.py} +7 -7
  121. waldiez/models/agents/agent/nested_chat.py +35 -7
  122. waldiez/models/agents/agent/termination_message.py +30 -22
  123. waldiez/models/agents/{swarm_agent → agent}/update_system_message.py +22 -22
  124. waldiez/models/agents/agents.py +58 -63
  125. waldiez/models/agents/assistant/assistant.py +4 -4
  126. waldiez/models/agents/assistant/assistant_data.py +13 -1
  127. waldiez/models/agents/{captain_agent → captain}/captain_agent.py +5 -5
  128. waldiez/models/agents/{captain_agent → captain}/captain_agent_data.py +5 -5
  129. waldiez/models/agents/extra_requirements.py +11 -16
  130. waldiez/models/agents/group_manager/group_manager.py +103 -13
  131. waldiez/models/agents/group_manager/group_manager_data.py +36 -14
  132. waldiez/models/agents/group_manager/speakers.py +77 -24
  133. waldiez/models/agents/{rag_user → rag_user_proxy}/__init__.py +16 -16
  134. waldiez/models/agents/rag_user_proxy/rag_user_proxy.py +64 -0
  135. waldiez/models/agents/{rag_user/rag_user_data.py → rag_user_proxy/rag_user_proxy_data.py} +6 -5
  136. waldiez/models/agents/{rag_user → rag_user_proxy}/retrieve_config.py +182 -114
  137. waldiez/models/agents/{rag_user → rag_user_proxy}/vector_db_config.py +13 -13
  138. waldiez/models/agents/reasoning/reasoning_agent.py +6 -6
  139. waldiez/models/agents/reasoning/reasoning_agent_data.py +110 -63
  140. waldiez/models/agents/reasoning/reasoning_agent_reason_config.py +38 -10
  141. waldiez/models/agents/user_proxy/user_proxy.py +11 -7
  142. waldiez/models/agents/user_proxy/user_proxy_data.py +2 -2
  143. waldiez/models/chat/__init__.py +2 -1
  144. waldiez/models/chat/chat.py +166 -87
  145. waldiez/models/chat/chat_data.py +99 -136
  146. waldiez/models/chat/chat_message.py +33 -23
  147. waldiez/models/chat/chat_nested.py +31 -30
  148. waldiez/models/chat/chat_summary.py +10 -8
  149. waldiez/models/common/__init__.py +52 -2
  150. waldiez/models/common/ag2_version.py +1 -1
  151. waldiez/models/common/base.py +38 -7
  152. waldiez/models/common/dict_utils.py +42 -17
  153. waldiez/models/common/handoff.py +459 -0
  154. waldiez/models/common/id_generator.py +19 -0
  155. waldiez/models/common/method_utils.py +130 -68
  156. waldiez/{exporting/base/utils → models/common}/naming.py +38 -61
  157. waldiez/models/common/waldiez_version.py +37 -0
  158. waldiez/models/flow/__init__.py +9 -2
  159. waldiez/models/flow/connection.py +18 -0
  160. waldiez/models/flow/flow.py +311 -215
  161. waldiez/models/flow/flow_data.py +207 -40
  162. waldiez/models/flow/info.py +85 -0
  163. waldiez/models/flow/naming.py +131 -0
  164. waldiez/models/model/__init__.py +7 -1
  165. waldiez/models/model/extra_requirements.py +3 -12
  166. waldiez/models/model/model.py +76 -21
  167. waldiez/models/model/model_data.py +108 -20
  168. waldiez/models/tool/__init__.py +16 -0
  169. waldiez/models/tool/extra_requirements.py +36 -0
  170. waldiez/models/{skill/skill.py → tool/tool.py} +88 -88
  171. waldiez/models/tool/tool_data.py +51 -0
  172. waldiez/models/tool/tool_type.py +8 -0
  173. waldiez/models/waldiez.py +97 -80
  174. waldiez/runner.py +115 -61
  175. waldiez/running/__init__.py +13 -7
  176. waldiez/running/environment.py +49 -68
  177. waldiez/running/gen_seq_diagram.py +16 -14
  178. waldiez/running/post_run.py +119 -0
  179. waldiez/running/pre_run.py +149 -0
  180. waldiez/running/util.py +134 -0
  181. waldiez/utils/__init__.py +2 -4
  182. waldiez/utils/cli_extras/jupyter.py +5 -3
  183. waldiez/utils/cli_extras/runner.py +6 -4
  184. waldiez/utils/cli_extras/studio.py +6 -4
  185. waldiez/utils/conflict_checker.py +15 -9
  186. waldiez/utils/flaml_warnings.py +5 -5
  187. waldiez/utils/version.py +47 -0
  188. {waldiez-0.4.7.dist-info → waldiez-0.4.9.dist-info}/METADATA +235 -91
  189. waldiez-0.4.9.dist-info/RECORD +203 -0
  190. waldiez/exporting/agent/agent_exporter.py +0 -297
  191. waldiez/exporting/agent/utils/__init__.py +0 -23
  192. waldiez/exporting/agent/utils/captain_agent.py +0 -263
  193. waldiez/exporting/agent/utils/code_execution.py +0 -65
  194. waldiez/exporting/agent/utils/group_manager.py +0 -220
  195. waldiez/exporting/agent/utils/rag_user/__init__.py +0 -7
  196. waldiez/exporting/agent/utils/rag_user/rag_user.py +0 -209
  197. waldiez/exporting/agent/utils/reasoning.py +0 -36
  198. waldiez/exporting/agent/utils/swarm_agent.py +0 -469
  199. waldiez/exporting/agent/utils/teachability.py +0 -41
  200. waldiez/exporting/agent/utils/termination_message.py +0 -44
  201. waldiez/exporting/base/__init__.py +0 -25
  202. waldiez/exporting/base/agent_position.py +0 -75
  203. waldiez/exporting/base/base_exporter.py +0 -118
  204. waldiez/exporting/base/export_position.py +0 -48
  205. waldiez/exporting/base/import_position.py +0 -23
  206. waldiez/exporting/base/mixin.py +0 -137
  207. waldiez/exporting/base/utils/__init__.py +0 -18
  208. waldiez/exporting/base/utils/comments.py +0 -96
  209. waldiez/exporting/base/utils/path_check.py +0 -68
  210. waldiez/exporting/base/utils/to_string.py +0 -84
  211. waldiez/exporting/chats/chats_exporter.py +0 -240
  212. waldiez/exporting/chats/utils/swarm.py +0 -210
  213. waldiez/exporting/flow/flow_exporter.py +0 -528
  214. waldiez/exporting/flow/utils/agent_utils.py +0 -204
  215. waldiez/exporting/flow/utils/chat_utils.py +0 -71
  216. waldiez/exporting/flow/utils/def_main.py +0 -77
  217. waldiez/exporting/flow/utils/flow_content.py +0 -202
  218. waldiez/exporting/flow/utils/flow_names.py +0 -116
  219. waldiez/exporting/flow/utils/importing_utils.py +0 -227
  220. waldiez/exporting/models/models_exporter.py +0 -199
  221. waldiez/exporting/models/utils.py +0 -174
  222. waldiez/exporting/skills/__init__.py +0 -9
  223. waldiez/exporting/skills/skills_exporter.py +0 -176
  224. waldiez/exporting/skills/utils.py +0 -369
  225. waldiez/models/agents/agent/teachability.py +0 -70
  226. waldiez/models/agents/rag_user/rag_user.py +0 -60
  227. waldiez/models/agents/swarm_agent/__init__.py +0 -50
  228. waldiez/models/agents/swarm_agent/after_work.py +0 -179
  229. waldiez/models/agents/swarm_agent/on_condition.py +0 -105
  230. waldiez/models/agents/swarm_agent/on_condition_available.py +0 -142
  231. waldiez/models/agents/swarm_agent/on_condition_target.py +0 -40
  232. waldiez/models/agents/swarm_agent/swarm_agent.py +0 -107
  233. waldiez/models/agents/swarm_agent/swarm_agent_data.py +0 -124
  234. waldiez/models/flow/utils.py +0 -232
  235. waldiez/models/skill/__init__.py +0 -16
  236. waldiez/models/skill/extra_requirements.py +0 -36
  237. waldiez/models/skill/skill_data.py +0 -53
  238. waldiez/models/skill/skill_type.py +0 -8
  239. waldiez/running/running.py +0 -369
  240. waldiez/utils/pysqlite3_checker.py +0 -308
  241. waldiez/utils/rdps_checker.py +0 -122
  242. waldiez-0.4.7.dist-info/RECORD +0 -149
  243. /waldiez/models/agents/{captain_agent → captain}/__init__.py +0 -0
  244. /waldiez/models/agents/{captain_agent → captain}/captain_agent_lib_entry.py +0 -0
  245. {waldiez-0.4.7.dist-info → waldiez-0.4.9.dist-info}/WHEEL +0 -0
  246. {waldiez-0.4.7.dist-info → waldiez-0.4.9.dist-info}/entry_points.txt +0 -0
  247. {waldiez-0.4.7.dist-info → waldiez-0.4.9.dist-info}/licenses/LICENSE +0 -0
  248. {waldiez-0.4.7.dist-info → waldiez-0.4.9.dist-info}/licenses/NOTICE.md +0 -0
@@ -4,21 +4,21 @@
4
4
 
5
5
  import os
6
6
  from pathlib import Path
7
- from typing import Dict, List, Optional, Tuple, Union
7
+ from typing import Optional, Union
8
8
 
9
9
  from pydantic import Field, model_validator
10
10
  from typing_extensions import Annotated, Literal, Self
11
11
 
12
12
  from ...common import WaldiezBase, check_function, generate_function
13
- from .vector_db_config import WaldiezRagUserVectorDbConfig
13
+ from .vector_db_config import WaldiezRagUserProxyVectorDbConfig
14
14
 
15
- WaldiezRagUserTask = Literal["code", "qa", "default"]
15
+ WaldiezRagUserProxyTask = Literal["code", "qa", "default"]
16
16
  """Possible tasks for the retrieve chat."""
17
- WaldiezRagUserVectorDb = Literal["chroma", "pgvector", "mongodb", "qdrant"]
17
+ WaldiezRagUserProxyVectorDb = Literal["chroma", "pgvector", "mongodb", "qdrant"]
18
18
  """Possible vector dbs for the retrieve chat."""
19
- WaldiezRagUserChunkMode = Literal["multi_lines", "one_line"]
19
+ WaldiezRagUserProxyChunkMode = Literal["multi_lines", "one_line"]
20
20
  """Possible chunk modes for the retrieve chat."""
21
- WaldiezRagUserModels: Dict[WaldiezRagUserVectorDb, str] = {
21
+ WaldiezRagUserProxyModels: dict[WaldiezRagUserProxyVectorDb, str] = {
22
22
  "chroma": "all-MiniLM-L6-v2",
23
23
  "mongodb": "all-MiniLM-L6-v2",
24
24
  "pgvector": "all-MiniLM-L6-v2",
@@ -26,8 +26,8 @@ WaldiezRagUserModels: Dict[WaldiezRagUserVectorDb, str] = {
26
26
  }
27
27
 
28
28
  CUSTOM_EMBEDDING_FUNCTION = "custom_embedding_function"
29
- CUSTOM_EMBEDDING_FUNCTION_ARGS: List[str] = []
30
- CUSTOM_EMBEDDING_FUNCTION_TYPES: Tuple[List[str], str] = (
29
+ CUSTOM_EMBEDDING_FUNCTION_ARGS: list[str] = []
30
+ CUSTOM_EMBEDDING_FUNCTION_TYPES: tuple[list[str], str] = (
31
31
  [],
32
32
  "Callable[..., Any]",
33
33
  )
@@ -48,7 +48,7 @@ CUSTOM_TEXT_SPLIT_FUNCTION_ARGS = [
48
48
  ]
49
49
  CUSTOM_TEXT_SPLIT_FUNCTION_TYPES = (
50
50
  ["str", "int", "str", "bool", "int"],
51
- "List[str]",
51
+ "list[str]",
52
52
  )
53
53
  NOT_LOCAL = (
54
54
  "http://",
@@ -56,10 +56,14 @@ NOT_LOCAL = (
56
56
  "ftp://",
57
57
  "ftps://",
58
58
  "sftp://",
59
+ "hdfs",
60
+ "s3://",
61
+ "gs://",
62
+ "azure://",
59
63
  )
60
64
 
61
65
 
62
- class WaldiezRagUserRetrieveConfig(WaldiezBase):
66
+ class WaldiezRagUserProxyRetrieveConfig(WaldiezBase):
63
67
  """RAG user agent.
64
68
 
65
69
  Attributes
@@ -74,7 +78,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
74
78
  The vector db for the retrieve chat.
75
79
  db_config : Annotated[WaldiezVectorDbConfig, Field]
76
80
  The config for the selected vector db.
77
- docs_path : Optional[Union[str, List[str]]]
81
+ docs_path : Optional[Union[str, list[str]]]
78
82
  The path to the docs directory. It can also be the path to a single
79
83
  file, the url to a single file or a list of directories, files and
80
84
  urls. Default is None, which works only if the collection is already
@@ -147,7 +151,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
147
151
  A custom function to split a string into a list of strings. Default is
148
152
  None, will use the default function in autogen.retrieve_utils.
149
153
  split_text_to_chunks.
150
- custom_text_types : Optional[List[str]]
154
+ custom_text_types : Optional[list[str]]
151
155
  A list of file types to be processed. Default is autogen.retrieve_utils.
152
156
  TEXT_FORMATS. This only applies to files under the directories in
153
157
  docs_path. Explicitly included files and urls will be chunked
@@ -180,9 +184,9 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
180
184
  """
181
185
 
182
186
  task: Annotated[
183
- WaldiezRagUserTask,
187
+ WaldiezRagUserProxyTask,
184
188
  Field(
185
- "default",
189
+ default="default",
186
190
  title="Task",
187
191
  description=(
188
192
  "The task of the retrieve chat. "
@@ -193,25 +197,25 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
193
197
  "the response."
194
198
  ),
195
199
  ),
196
- ]
200
+ ] = "default"
197
201
  vector_db: Annotated[
198
- WaldiezRagUserVectorDb,
202
+ WaldiezRagUserProxyVectorDb,
199
203
  Field(
200
- "chroma",
204
+ default="chroma",
201
205
  title="Vector DB",
202
206
  description="The vector db for the retrieve chat.",
203
207
  ),
204
- ]
208
+ ] = "chroma"
205
209
  db_config: Annotated[
206
- WaldiezRagUserVectorDbConfig,
210
+ WaldiezRagUserProxyVectorDbConfig,
207
211
  Field(
208
212
  title="DB Config",
209
213
  description="The config for the selected vector db.",
210
- default_factory=WaldiezRagUserVectorDbConfig,
214
+ default_factory=WaldiezRagUserProxyVectorDbConfig,
211
215
  ),
212
216
  ]
213
217
  docs_path: Annotated[
214
- Optional[Union[str, List[str]]],
218
+ Optional[Union[str, list[str]]],
215
219
  Field(
216
220
  default=None,
217
221
  title="Docs Path",
@@ -222,7 +226,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
222
226
  "only if the collection is already created."
223
227
  ),
224
228
  ),
225
- ]
229
+ ] = None
226
230
  new_docs: Annotated[
227
231
  bool,
228
232
  Field(
@@ -236,7 +240,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
236
240
  "hash value of the content."
237
241
  ),
238
242
  ),
239
- ]
243
+ ] = True
240
244
  model: Annotated[
241
245
  Optional[str],
242
246
  Field(
@@ -247,7 +251,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
247
251
  "we check for models linked to the agent."
248
252
  ),
249
253
  ),
250
- ]
254
+ ] = None
251
255
  chunk_token_size: Annotated[
252
256
  Optional[int],
253
257
  Field(
@@ -259,7 +263,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
259
263
  "will be used."
260
264
  ),
261
265
  ),
262
- ]
266
+ ] = None
263
267
  context_max_tokens: Annotated[
264
268
  Optional[int],
265
269
  Field(
@@ -271,9 +275,9 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
271
275
  "will be used."
272
276
  ),
273
277
  ),
274
- ]
278
+ ] = None
275
279
  chunk_mode: Annotated[
276
- WaldiezRagUserChunkMode,
280
+ WaldiezRagUserProxyChunkMode,
277
281
  Field(
278
282
  default="multi_lines",
279
283
  title="Chunk Mode",
@@ -283,8 +287,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
283
287
  "a default mode multi_lines will be used."
284
288
  ),
285
289
  ),
286
- ]
287
-
290
+ ] = "multi_lines"
288
291
  must_break_at_empty_line: Annotated[
289
292
  bool,
290
293
  Field(
@@ -295,7 +298,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
295
298
  "If chunk_mode is 'one_line', this parameter will be ignored."
296
299
  ),
297
300
  ),
298
- ]
301
+ ] = True
299
302
  use_custom_embedding: Annotated[
300
303
  bool,
301
304
  Field(
@@ -307,7 +310,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
307
310
  "provided."
308
311
  ),
309
312
  ),
310
- ]
313
+ ] = False
311
314
  embedding_function: Annotated[
312
315
  Optional[str],
313
316
  Field(
@@ -322,7 +325,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
322
325
  "https://docs.trychroma.com/guides/embeddings."
323
326
  ),
324
327
  ),
325
- ]
328
+ ] = None
326
329
  customized_prompt: Annotated[
327
330
  Optional[str],
328
331
  Field(
@@ -332,7 +335,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
332
335
  "The customized prompt for the retrieve chat. Default is None."
333
336
  ),
334
337
  ),
335
- ]
338
+ ] = None
336
339
  customized_answer_prefix: Annotated[
337
340
  Optional[str],
338
341
  Field(
@@ -344,7 +347,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
344
347
  "not in the answer, Update Context will be triggered."
345
348
  ),
346
349
  ),
347
- ]
350
+ ] = ""
348
351
  update_context: Annotated[
349
352
  bool,
350
353
  Field(
@@ -355,7 +358,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
355
358
  "retrieval. Default is True."
356
359
  ),
357
360
  ),
358
- ]
361
+ ] = True
359
362
  collection_name: Annotated[
360
363
  str,
361
364
  Field(
@@ -366,7 +369,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
366
369
  "a default name autogen-docs will be used."
367
370
  ),
368
371
  ),
369
- ]
372
+ ] = "autogen-docs"
370
373
  get_or_create: Annotated[
371
374
  bool,
372
375
  Field(
@@ -376,7 +379,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
376
379
  "Whether to get the collection if it exists. Default is False."
377
380
  ),
378
381
  ),
379
- ]
382
+ ] = False
380
383
  overwrite: Annotated[
381
384
  bool,
382
385
  Field(
@@ -394,7 +397,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
394
397
  " otherwise it raise a ValueError."
395
398
  ),
396
399
  ),
397
- ]
400
+ ] = False
398
401
  use_custom_token_count: Annotated[
399
402
  bool,
400
403
  Field(
@@ -406,7 +409,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
406
409
  "custom_token_count_function should be provided."
407
410
  ),
408
411
  ),
409
- ]
412
+ ] = False
410
413
  custom_token_count_function: Annotated[
411
414
  Optional[str],
412
415
  Field(
@@ -421,7 +424,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
421
424
  "tiktoken, which may not be accurate for non-OpenAI models."
422
425
  ),
423
426
  ),
424
- ]
427
+ ] = None
425
428
  use_custom_text_split: Annotated[
426
429
  bool,
427
430
  Field(
@@ -433,7 +436,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
433
436
  "custom_text_split_function should be provided."
434
437
  ),
435
438
  ),
436
- ]
439
+ ] = False
437
440
  custom_text_split_function: Annotated[
438
441
  Optional[str],
439
442
  Field(
@@ -445,9 +448,9 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
445
448
  "autogen.retrieve_utils.split_text_to_chunks."
446
449
  ),
447
450
  ),
448
- ]
451
+ ] = None
449
452
  custom_text_types: Annotated[
450
- Optional[List[str]],
453
+ Optional[list[str]],
451
454
  Field(
452
455
  default=None,
453
456
  title="Custom Text Types",
@@ -459,7 +462,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
459
462
  "chunked regardless of their types."
460
463
  ),
461
464
  ),
462
- ]
465
+ ] = None
463
466
  recursive: Annotated[
464
467
  bool,
465
468
  Field(
@@ -470,7 +473,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
470
473
  "Default is True."
471
474
  ),
472
475
  ),
473
- ]
476
+ ] = True
474
477
  distance_threshold: Annotated[
475
478
  float,
476
479
  Field(
@@ -482,7 +485,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
482
485
  "Will be ignored if < 0. Default is -1."
483
486
  ),
484
487
  ),
485
- ]
488
+ ] = -1
486
489
  n_results: Annotated[
487
490
  Optional[int],
488
491
  Field(
@@ -494,7 +497,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
494
497
  "Use None or <1 to return all results."
495
498
  ),
496
499
  ),
497
- ]
500
+ ] = None
498
501
  _embedding_function_string: Optional[str] = None
499
502
 
500
503
  _token_count_function_string: Optional[str] = None
@@ -538,7 +541,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
538
541
  self,
539
542
  name_prefix: Optional[str] = None,
540
543
  name_suffix: Optional[str] = None,
541
- ) -> Tuple[str, str]:
544
+ ) -> tuple[str, str]:
542
545
  """Generate the custom embedding function.
543
546
 
544
547
  Parameters
@@ -550,7 +553,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
550
553
 
551
554
  Returns
552
555
  -------
553
- Tuple[str, str]
556
+ tuple[str, str]
554
557
  The custom embedding function and the function name.
555
558
  """
556
559
  function_name = CUSTOM_EMBEDDING_FUNCTION
@@ -572,7 +575,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
572
575
  self,
573
576
  name_prefix: Optional[str] = None,
574
577
  name_suffix: Optional[str] = None,
575
- ) -> Tuple[str, str]:
578
+ ) -> tuple[str, str]:
576
579
  """Generate the custom token count function.
577
580
 
578
581
  Parameters
@@ -584,7 +587,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
584
587
 
585
588
  Returns
586
589
  -------
587
- Tuple[str, str]
590
+ tuple[str, str]
588
591
  The custom token count function and the function name.
589
592
  """
590
593
  function_name = CUSTOM_TOKEN_COUNT_FUNCTION
@@ -606,7 +609,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
606
609
  self,
607
610
  name_prefix: Optional[str] = None,
608
611
  name_suffix: Optional[str] = None,
609
- ) -> Tuple[str, str]:
612
+ ) -> tuple[str, str]:
610
613
  """Generate the custom text split function.
611
614
 
612
615
  Parameters
@@ -618,7 +621,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
618
621
 
619
622
  Returns
620
623
  -------
621
- Tuple[str, str]
624
+ tuple[str, str]
622
625
  The custom text split function and the function name.
623
626
  """
624
627
  function_name = CUSTOM_TEXT_SPLIT_FUNCTION
@@ -715,35 +718,50 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
715
718
  """
716
719
  if not self.docs_path:
717
720
  return
718
- # if urls or directories ok, if files they should resolve
721
+
722
+ # Normalize to list
719
723
  doc_paths = (
720
724
  [self.docs_path]
721
725
  if isinstance(self.docs_path, str)
722
726
  else self.docs_path
723
727
  )
724
- paths: List[str] = []
728
+
729
+ validated_paths: list[str] = []
730
+
725
731
  for path in doc_paths:
726
- resolved = path
727
- is_remote, is_raw = is_remote_path(resolved)
728
- if is_remote:
729
- if not is_raw:
730
- resolved = f'r"{resolved}"'
731
- if resolved not in paths:
732
- paths.append(resolved)
732
+ # Skip duplicates
733
+ if path in validated_paths:
733
734
  continue
734
- resolved = remove_file_scheme(resolved)
735
- is_raw = resolved.startswith(("r'", 'r"'))
736
- maybe_folder = string_represents_folder(resolved)
737
- if maybe_folder:
738
- if not is_raw:
739
- resolved = f'r"{resolved}"'
740
- if resolved not in paths:
741
- paths.append(resolved)
735
+
736
+ # Check if it's a remote path
737
+ is_remote = is_remote_path(path)
738
+ if is_remote:
739
+ # Remote paths: ensure proper raw string wrapping if needed
740
+ content = extract_raw_string_content(path)
741
+ validated_paths.append(f'r"{content}"')
742
742
  continue
743
- resolved = resolve_path(resolved, is_raw, not maybe_folder)
744
- if resolved not in paths:
745
- paths.append(resolved)
746
- self.docs_path = paths
743
+
744
+ # Handle local paths
745
+ # First remove any file:// scheme
746
+ cleaned_path = remove_file_scheme(path)
747
+ content = extract_raw_string_content(cleaned_path)
748
+
749
+ # Determine if it's likely a folder
750
+ is_folder = string_represents_folder(content)
751
+
752
+ if is_folder:
753
+ validated_paths.append(f'r"{content}"')
754
+ else:
755
+ # Files: resolve and validate existence
756
+ try:
757
+ resolved_path = resolve_path(cleaned_path, must_exist=True)
758
+ validated_paths.append(resolved_path)
759
+ except ValueError as e:
760
+ raise ValueError(f"Invalid file path '{path}': {e}") from e
761
+
762
+ # remove dupes (but keep order)
763
+ validated_paths = list(dict.fromkeys(validated_paths))
764
+ self.docs_path = [path for path in validated_paths if path]
747
765
 
748
766
  @model_validator(mode="after")
749
767
  def validate_rag_user_data(self) -> Self:
@@ -756,7 +774,7 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
756
774
 
757
775
  Returns
758
776
  -------
759
- WaldiezRagUserData
777
+ WaldiezRagUserProxyData
760
778
  The validated RAG user data.
761
779
  """
762
780
  self.validate_custom_embedding_function()
@@ -764,12 +782,35 @@ class WaldiezRagUserRetrieveConfig(WaldiezBase):
764
782
  self.validate_custom_text_split_function()
765
783
  self.validate_docs_path()
766
784
  if not self.db_config.model:
767
- self.db_config.model = WaldiezRagUserModels[self.vector_db]
785
+ self.db_config.model = WaldiezRagUserProxyModels[self.vector_db]
768
786
  if isinstance(self.n_results, int) and self.n_results < 1:
769
787
  self.n_results = None
770
788
  return self
771
789
 
772
790
 
791
+ def extract_raw_string_content(path: str) -> str:
792
+ """Extract content from potential raw string formats.
793
+
794
+ Parameters
795
+ ----------
796
+ path : str
797
+ The path that might be wrapped in raw string format.
798
+
799
+ Returns
800
+ -------
801
+ str
802
+ The actual content of the path, without raw string formatting.
803
+ """
804
+ # Handle r"..." and r'...'
805
+ if path.startswith(('r"', "r'")) and len(path) > 3:
806
+ quote = path[1]
807
+ if path.endswith(quote):
808
+ return path[2:-1]
809
+ # Handle malformed raw strings (missing end quote)
810
+ return path[2:]
811
+ return path
812
+
813
+
773
814
  def string_represents_folder(path: str) -> bool:
774
815
  """Check if a string represents a folder.
775
816
 
@@ -783,14 +824,27 @@ def string_represents_folder(path: str) -> bool:
783
824
  bool
784
825
  True if the path is likely a folder, False if it's likely a file.
785
826
  """
786
- if path.endswith(os.path.sep):
787
- return True
788
- if os.path.isdir(path):
827
+ # Extract actual path content if wrapped
828
+ content = extract_raw_string_content(path)
829
+
830
+ # Explicit folder indicators
831
+ if content.endswith(("/", "\\", os.path.sep)):
789
832
  return True
790
- return not os.path.splitext(path)[1]
791
833
 
834
+ # Check if it actually exists and is a directory
835
+ try:
836
+ if os.path.isdir(content):
837
+ return True
838
+ except (OSError, ValueError): # pragma: no cover
839
+ pass
840
+
841
+ # Heuristic: no file extension likely means folder
842
+ # return not os.path.splitext(content)[1]
843
+ _, ext = os.path.splitext(path.rstrip("/\\"))
844
+ return not ext
792
845
 
793
- def is_remote_path(path: str) -> Tuple[bool, bool]:
846
+
847
+ def is_remote_path(path: str) -> bool:
794
848
  """Check if a path is a remote path.
795
849
 
796
850
  Parameters
@@ -800,14 +854,14 @@ def is_remote_path(path: str) -> Tuple[bool, bool]:
800
854
 
801
855
  Returns
802
856
  -------
803
- Tuple[bool, bool]
857
+ tuple[bool, bool]
804
858
  If the path is a remote path and if it's a raw string.
805
859
  """
806
- is_raw = path.startswith(("r'", 'r"'))
860
+ content = extract_raw_string_content(path)
807
861
  for not_local in NOT_LOCAL:
808
- if path.startswith((not_local, f'r"{not_local}', f"r'{not_local}")):
809
- return True, is_raw
810
- return False, is_raw
862
+ if content.startswith((not_local, f'r"{not_local}', f"r'{not_local}")):
863
+ return True
864
+ return False
811
865
 
812
866
 
813
867
  def remove_file_scheme(path: str) -> str:
@@ -823,54 +877,68 @@ def remove_file_scheme(path: str) -> str:
823
877
  str
824
878
  The path without the scheme.
825
879
  """
826
- resolved = str(path)
827
- while resolved.startswith('r"file://') and resolved.endswith('"'):
828
- resolved = resolved[len('r"file://') : -1]
829
- while resolved.startswith("r'file://") and resolved.endswith("'"):
830
- resolved = resolved[len("r'file://") : -1]
831
- while resolved.startswith("file://"):
832
- resolved = resolved[len("file://") :]
833
- return resolved
880
+ content = extract_raw_string_content(path)
881
+
882
+ # Remove file:// prefix
883
+ while content.startswith("file://"):
884
+ content = content[len("file://") :]
834
885
 
886
+ return f'r"{content}"'
835
887
 
836
- def resolve_path(path: str, is_raw: bool, must_exist: bool) -> str:
888
+
889
+ def resolve_path(path: str, must_exist: bool) -> str:
837
890
  """Try to resolve a path.
838
891
 
839
892
  Parameters
840
893
  ----------
841
894
  path : str
842
895
  The path to resolve.
843
- is_raw : bool
844
- If the path is a raw string.
845
896
  must_exist : bool
846
897
  If the path must exist.
847
898
 
848
899
  Returns
849
900
  -------
850
- Path
851
- The resolved path.
901
+ str
902
+ The resolved path, potentially wrapped in raw string format.
852
903
 
853
904
  Raises
854
905
  ------
855
906
  ValueError
856
907
  If the path is not a valid local path.
857
908
  """
858
- # pylint: disable=broad-except
859
- path_string = path
860
- if is_raw:
861
- path_string = path[2:-1]
909
+ # Extract the actual path content
910
+ # if is_raw:
911
+ path_content = extract_raw_string_content(path)
912
+ # else:
913
+ # path_content = path
914
+
915
+ # Handle JSON-escaped backslashes
916
+ if "\\\\" in path_content: # pragma: no cover
917
+ path_content = path_content.replace("\\\\", "\\")
918
+ # pylint: disable=too-many-try-statements
862
919
  try:
863
- resolved = Path(path_string).resolve()
864
- except BaseException as error: # pragma: no cover
865
- # check if 'r'... is needed
866
- raw_string = f'r"{path}"'
920
+ # Try to resolve the path
921
+ resolved = Path(path_content).resolve()
922
+
923
+ if must_exist and not resolved.exists():
924
+ raise ValueError(f"Path {path} does not exist.")
925
+
926
+ return f'r"{resolved}"'
927
+
928
+ except (
929
+ OSError,
930
+ UnicodeDecodeError,
931
+ ValueError,
932
+ ) as error: # pragma: no cover
933
+ # Fallback: try as raw string for Windows compatibility
934
+ raw_version = f'r"{path_content}"'
867
935
  try:
868
- Path(raw_string).resolve()
869
- except BaseException:
936
+ # Test if the path can be resolved when treated as raw
937
+ resolved = Path(raw_version).resolve()
938
+ if must_exist and not resolved.exists():
939
+ raise ValueError(f"Path {path} does not exist.") from error
940
+ return raw_version
941
+ except Exception:
870
942
  raise ValueError(
871
- f"Path {path} is not a valid local path."
943
+ f"Path {path} is not a valid local path: {error}"
872
944
  ) from error
873
- return raw_string
874
- if not resolved.exists() and must_exist:
875
- raise ValueError(f"Path {path} does not exist.")
876
- return f'r"{resolved}"'