waldiez 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of waldiez might be problematic. Click here for more details.

Files changed (94) hide show
  1. waldiez/__init__.py +15 -0
  2. waldiez/__main__.py +6 -0
  3. waldiez/_version.py +3 -0
  4. waldiez/cli.py +162 -0
  5. waldiez/exporter.py +293 -0
  6. waldiez/exporting/__init__.py +14 -0
  7. waldiez/exporting/agents/__init__.py +5 -0
  8. waldiez/exporting/agents/agent.py +229 -0
  9. waldiez/exporting/agents/agent_skills.py +67 -0
  10. waldiez/exporting/agents/code_execution.py +67 -0
  11. waldiez/exporting/agents/group_manager.py +209 -0
  12. waldiez/exporting/agents/llm_config.py +53 -0
  13. waldiez/exporting/agents/rag_user/__init__.py +5 -0
  14. waldiez/exporting/agents/rag_user/chroma_utils.py +134 -0
  15. waldiez/exporting/agents/rag_user/mongo_utils.py +83 -0
  16. waldiez/exporting/agents/rag_user/pgvector_utils.py +93 -0
  17. waldiez/exporting/agents/rag_user/qdrant_utils.py +112 -0
  18. waldiez/exporting/agents/rag_user/rag_user.py +165 -0
  19. waldiez/exporting/agents/rag_user/vector_db.py +119 -0
  20. waldiez/exporting/agents/teachability.py +37 -0
  21. waldiez/exporting/agents/termination_message.py +45 -0
  22. waldiez/exporting/chats/__init__.py +14 -0
  23. waldiez/exporting/chats/chats.py +46 -0
  24. waldiez/exporting/chats/helpers.py +395 -0
  25. waldiez/exporting/chats/nested.py +264 -0
  26. waldiez/exporting/flow/__init__.py +5 -0
  27. waldiez/exporting/flow/def_main.py +37 -0
  28. waldiez/exporting/flow/flow.py +185 -0
  29. waldiez/exporting/models/__init__.py +193 -0
  30. waldiez/exporting/skills/__init__.py +128 -0
  31. waldiez/exporting/utils/__init__.py +34 -0
  32. waldiez/exporting/utils/comments.py +136 -0
  33. waldiez/exporting/utils/importing.py +267 -0
  34. waldiez/exporting/utils/logging_utils.py +203 -0
  35. waldiez/exporting/utils/method_utils.py +35 -0
  36. waldiez/exporting/utils/naming.py +127 -0
  37. waldiez/exporting/utils/object_string.py +81 -0
  38. waldiez/io_stream.py +181 -0
  39. waldiez/models/__init__.py +107 -0
  40. waldiez/models/agents/__init__.py +65 -0
  41. waldiez/models/agents/agent/__init__.py +21 -0
  42. waldiez/models/agents/agent/agent.py +190 -0
  43. waldiez/models/agents/agent/agent_data.py +162 -0
  44. waldiez/models/agents/agent/code_execution.py +71 -0
  45. waldiez/models/agents/agent/linked_skill.py +30 -0
  46. waldiez/models/agents/agent/nested_chat.py +73 -0
  47. waldiez/models/agents/agent/teachability.py +68 -0
  48. waldiez/models/agents/agent/termination_message.py +167 -0
  49. waldiez/models/agents/agents.py +129 -0
  50. waldiez/models/agents/assistant/__init__.py +6 -0
  51. waldiez/models/agents/assistant/assistant.py +41 -0
  52. waldiez/models/agents/assistant/assistant_data.py +29 -0
  53. waldiez/models/agents/group_manager/__init__.py +19 -0
  54. waldiez/models/agents/group_manager/group_manager.py +87 -0
  55. waldiez/models/agents/group_manager/group_manager_data.py +91 -0
  56. waldiez/models/agents/group_manager/speakers.py +211 -0
  57. waldiez/models/agents/rag_user/__init__.py +26 -0
  58. waldiez/models/agents/rag_user/rag_user.py +58 -0
  59. waldiez/models/agents/rag_user/rag_user_data.py +32 -0
  60. waldiez/models/agents/rag_user/retrieve_config.py +592 -0
  61. waldiez/models/agents/rag_user/vector_db_config.py +162 -0
  62. waldiez/models/agents/user_proxy/__init__.py +6 -0
  63. waldiez/models/agents/user_proxy/user_proxy.py +41 -0
  64. waldiez/models/agents/user_proxy/user_proxy_data.py +30 -0
  65. waldiez/models/chat/__init__.py +22 -0
  66. waldiez/models/chat/chat.py +129 -0
  67. waldiez/models/chat/chat_data.py +326 -0
  68. waldiez/models/chat/chat_message.py +304 -0
  69. waldiez/models/chat/chat_nested.py +160 -0
  70. waldiez/models/chat/chat_summary.py +110 -0
  71. waldiez/models/common/__init__.py +38 -0
  72. waldiez/models/common/base.py +63 -0
  73. waldiez/models/common/method_utils.py +165 -0
  74. waldiez/models/flow/__init__.py +9 -0
  75. waldiez/models/flow/flow.py +302 -0
  76. waldiez/models/flow/flow_data.py +87 -0
  77. waldiez/models/model/__init__.py +11 -0
  78. waldiez/models/model/model.py +169 -0
  79. waldiez/models/model/model_data.py +86 -0
  80. waldiez/models/skill/__init__.py +9 -0
  81. waldiez/models/skill/skill.py +129 -0
  82. waldiez/models/skill/skill_data.py +37 -0
  83. waldiez/models/waldiez.py +301 -0
  84. waldiez/py.typed +0 -0
  85. waldiez/runner.py +304 -0
  86. waldiez/stream/__init__.py +7 -0
  87. waldiez/stream/consumer.py +139 -0
  88. waldiez/stream/provider.py +339 -0
  89. waldiez/stream/server.py +412 -0
  90. waldiez-0.1.0.dist-info/METADATA +181 -0
  91. waldiez-0.1.0.dist-info/RECORD +94 -0
  92. waldiez-0.1.0.dist-info/WHEEL +4 -0
  93. waldiez-0.1.0.dist-info/entry_points.txt +2 -0
  94. waldiez-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,592 @@
1
+ """RAG user agent retrieve config."""
2
+
3
+ from typing import Dict, List, Optional, Union
4
+
5
+ from pydantic import ConfigDict, Field, model_validator
6
+ from pydantic.alias_generators import to_camel
7
+ from typing_extensions import Annotated, Literal, Self
8
+
9
+ from ...common import WaldiezBase, WaldiezMethodName, check_function
10
+ from .vector_db_config import WaldiezRagUserVectorDbConfig
11
+
12
+ WaldiezRagUserTask = Literal["code", "qa", "default"]
13
+ WaldiezRagUserVectorDb = Literal["chroma", "pgvector", "mongodb", "qdrant"]
14
+ WaldiezRagUserChunkMode = Literal["multi_lines", "one_line"]
15
+ WaldiezRagUserModels: Dict[WaldiezRagUserVectorDb, str] = {
16
+ "chroma": "all-MiniLM-L6-v2",
17
+ "mongodb": "all-MiniLM-L6-v2",
18
+ "pgvector": "all-MiniLM-L6-v2",
19
+ "qdrant": "BAAI/bge-small-en-v1.5",
20
+ }
21
+
22
+
23
+ class WaldiezRagUserRetrieveConfig(WaldiezBase):
24
+ """RAG user agent.
25
+
26
+ Attributes
27
+ ----------
28
+ task : Literal["code", "qa", "default"]
29
+ The task of the retrieve chat.
30
+ Possible values are 'code', 'qa' and 'default'.
31
+ System prompt will be different for different tasks.
32
+ The default value is default, which supports both code and qa,
33
+ and provides source information in the end of the response.
34
+ vector_db : Literal["chroma", "pgvector", "mongodb", "qdrant"]
35
+ The vector db for the retrieve chat.
36
+ db_config : Annotated[WaldiezVectorDbConfig, Field]
37
+ The config for the selected vector db.
38
+ docs_path : Optional[Union[str, List[str]]]
39
+ The path to the docs directory. It can also be the path to a single
40
+ file, the url to a single file or a list of directories, files and
41
+ urls. Default is None, which works only if the collection is already
42
+ created.
43
+ new_docs : bool
44
+ When True, only adds new documents to the collection; when False,
45
+ updates existing documents and adds new ones. Default is True.
46
+ Document id is used to determine if a document is new or existing.
47
+ By default, the id is the hash value of the content.
48
+ model : Optional[str]
49
+ The model to use for the retrieve chat. If key not provided, a default
50
+ model gpt-4 will be used.
51
+ chunk_token_size : Optional[int]
52
+ The chunk token size for the retrieve chat. If key not provided, a
53
+ default size max_tokens * 0.4 will be used.
54
+ context_max_tokens : Optional[int]
55
+ The context max token size for the retrieve chat. If key not provided,
56
+ a default size max_tokens * 0.8 will be used.
57
+ chunk_mode : Optional[str]
58
+ The chunk mode for the retrieve chat. Possible values are 'multi_lines'
59
+ and 'one_line'. If key not provided, a default mode multi_lines will be
60
+ used.
61
+ must_break_at_empty_line : bool
62
+ Chunk will only break at empty line if True. Default is True. If
63
+ chunk_mode is 'one_line', this parameter will be ignored.
64
+ use_custom_embedding: bool
65
+ Whether to use custom embedding for the retrieve chat. Default is False.
66
+ If True, the embedding_function should be provided.
67
+ embedding_function : Optional[str]
68
+ The embedding function for creating the vector db. Default is None,
69
+ SentenceTransformer with the given embedding_model will be used. If
70
+ you want to use OpenAI, Cohere, HuggingFace or other embedding
71
+ functions, you can pass it here, follow the examples in
72
+ https://docs.trychroma.com/guides/embeddings.
73
+ customized_prompt : Optional[str]
74
+ The customized prompt for the retrieve chat. Default is None.
75
+ customized_answer_prefix : Optional[str]
76
+ The customized answer prefix for the retrieve chat. Default is ''. If
77
+ not '' and the customized_answer_prefix is not in the answer, Update
78
+ Context will be triggered.
79
+ update_context : bool
80
+ If False, will not apply Update Context for interactive retrieval.
81
+ Default is True.
82
+ collection_name : Optional[str]
83
+ The name of the collection. If key not provided, a default name
84
+ autogen-docs will be used.
85
+ get_or_create : bool
86
+ Whether to get the collection if it exists. Default is False.
87
+ overwrite : bool
88
+ Whether to overwrite the collection if it exists. Default is False.
89
+ Case 1. if the collection does not exist, create the collection. Case
90
+ 2. the collection exists, if overwrite is True, it will overwrite the
91
+ collection. Case 3. the collection exists and overwrite is False, if
92
+ get_or_create is True, it will get the collection, otherwise it raise a
93
+ ValueError.
94
+ use_custom_token_count: bool
95
+ Whether to use custom token count function for the retrieve chat.
96
+ Default is False. If True, the custom_token_count_function should be
97
+ provided.
98
+ custom_token_count_function : Optional[str]
99
+ A custom function to count the number of tokens in a string. The
100
+ function should take (text:str, model:str) as input and return the
101
+ token_count(int). the retrieve_config['model'] will be passed in the
102
+ function. Default is autogen.token_count_utils.count_token that uses
103
+ tiktoken, which may not be accurate for non-OpenAI models.
104
+ use_custom_text_split: bool
105
+ Whether to use custom text split function for the retrieve chat. Default
106
+ is False. If True, the custom_text_split_function should be provided.
107
+ custom_text_split_function : Optional[str]
108
+ A custom function to split a string into a list of strings. Default is
109
+ None, will use the default function in autogen.retrieve_utils.
110
+ split_text_to_chunks.
111
+ custom_text_types : Optional[List[str]]
112
+ A list of file types to be processed. Default is autogen.retrieve_utils.
113
+ TEXT_FORMATS. This only applies to files under the directories in
114
+ docs_path. Explicitly included files and urls will be chunked
115
+ regardless of their types.
116
+ recursive : bool
117
+ Whether to search documents recursively in the docs_path. Default is
118
+ True.
119
+ distance_threshold : float
120
+ The threshold for the distance score, only distance smaller than it
121
+ will be returned. Will be ignored if < 0. Default is -1.
122
+ embedding_function_string : Optional[str]
123
+ The embedding function string (if use_custom_embedding is True).
124
+ token_count_function_string : Optional[str]
125
+ The token count function string (if use_custom_token_count is True).
126
+ text_split_function_string : Optional[str]
127
+ The text split function string (if use_custom_text_split is True).
128
+ n_results: Optional[int]
129
+ The number of results to return. Default is None, which will return all
130
+
131
+ Functions
132
+ ---------
133
+ validate_custom_embedding_function
134
+ Validate the custom embedding function.
135
+ validate_custom_token_count_function
136
+ Validate the custom token count function.
137
+ validate_custom_text_split_function
138
+ Validate the custom text split function.
139
+ validate_rag_user_data
140
+ Validate the RAG user data.
141
+ """
142
+
143
+ model_config = ConfigDict(
144
+ extra="forbid",
145
+ alias_generator=to_camel,
146
+ populate_by_name=True,
147
+ frozen=False,
148
+ )
149
+
150
+ task: Annotated[
151
+ WaldiezRagUserTask,
152
+ Field(
153
+ "default",
154
+ title="Task",
155
+ description=(
156
+ "The task of the retrieve chat. "
157
+ "Possible values are 'code', 'qa' and 'default'. "
158
+ "System prompt will be different for different tasks. "
159
+ "The default value is default, which supports both code, "
160
+ "and qa and provides source information in the end of "
161
+ "the response."
162
+ ),
163
+ ),
164
+ ]
165
+ vector_db: Annotated[
166
+ WaldiezRagUserVectorDb,
167
+ Field(
168
+ "chroma",
169
+ title="Vector DB",
170
+ description="The vector db for the retrieve chat.",
171
+ ),
172
+ ]
173
+ db_config: Annotated[
174
+ WaldiezRagUserVectorDbConfig,
175
+ Field(
176
+ title="DB Config",
177
+ description="The config for the selected vector db.",
178
+ default_factory=WaldiezRagUserVectorDbConfig,
179
+ ),
180
+ ]
181
+ docs_path: Annotated[
182
+ Optional[Union[str, List[str]]],
183
+ Field(
184
+ default=None,
185
+ title="Docs Path",
186
+ description=(
187
+ "The path to the docs directory. It can also be the path to "
188
+ "a single file, the url to a single file or a list of "
189
+ "directories, files and urls. Default is None, which works "
190
+ "only if the collection is already created."
191
+ ),
192
+ ),
193
+ ]
194
+ new_docs: Annotated[
195
+ bool,
196
+ Field(
197
+ default=True,
198
+ title="New Docs",
199
+ description=(
200
+ "When True, only adds new documents to the collection; "
201
+ "when False, updates existing documents and adds new ones. "
202
+ "Default is True. Document id is used to determine if a "
203
+ "document is new or existing. By default, the id is the "
204
+ "hash value of the content."
205
+ ),
206
+ ),
207
+ ]
208
+ model: Annotated[
209
+ Optional[str],
210
+ Field(
211
+ default=None,
212
+ title="Model",
213
+ description=(
214
+ "The model to use for the retrieve chat. If key not provided, "
215
+ "we check for models linked to the agent."
216
+ ),
217
+ ),
218
+ ]
219
+ chunk_token_size: Annotated[
220
+ Optional[int],
221
+ Field(
222
+ default=None,
223
+ title="Chunk Token Size",
224
+ description=(
225
+ "The chunk token size for the retrieve chat. "
226
+ "If key not provided, a default size max_tokens * 0.4 "
227
+ "will be used."
228
+ ),
229
+ ),
230
+ ]
231
+ context_max_tokens: Annotated[
232
+ Optional[int],
233
+ Field(
234
+ default=None,
235
+ title="Context Max Tokens",
236
+ description=(
237
+ "The context max token size for the retrieve chat. "
238
+ "If key not provided, a default size max_tokens * 0.8 "
239
+ "will be used."
240
+ ),
241
+ ),
242
+ ]
243
+ chunk_mode: Annotated[
244
+ WaldiezRagUserChunkMode,
245
+ Field(
246
+ default="multi_lines",
247
+ title="Chunk Mode",
248
+ description=(
249
+ "The chunk mode for the retrieve chat. Possible values are "
250
+ "'multi_lines' and 'one_line'. If key not provided, "
251
+ "a default mode multi_lines will be used."
252
+ ),
253
+ ),
254
+ ]
255
+
256
+ must_break_at_empty_line: Annotated[
257
+ bool,
258
+ Field(
259
+ default=True,
260
+ title="Must Break at Empty Line",
261
+ description=(
262
+ "Chunk will only break at empty line if True. Default is True. "
263
+ "If chunk_mode is 'one_line', this parameter will be ignored."
264
+ ),
265
+ ),
266
+ ]
267
+ use_custom_embedding: Annotated[
268
+ bool,
269
+ Field(
270
+ default=False,
271
+ title="Use Custom Embedding",
272
+ description=(
273
+ "Whether to use custom embedding for the retrieve chat. "
274
+ "Default is False. If True, the embedding_function should be "
275
+ "provided."
276
+ ),
277
+ ),
278
+ ]
279
+ embedding_function: Annotated[
280
+ Optional[str],
281
+ Field(
282
+ default=None,
283
+ title="Embedding Function",
284
+ description=(
285
+ "The embedding function for creating the vector db. "
286
+ "Default is None, SentenceTransformer with the given "
287
+ "embedding_model will be used. If you want to use OpenAI, "
288
+ "Cohere, HuggingFace or other embedding functions, "
289
+ "you can pass it here, follow the examples in "
290
+ "https://docs.trychroma.com/guides/embeddings."
291
+ ),
292
+ ),
293
+ ]
294
+ customized_prompt: Annotated[
295
+ Optional[str],
296
+ Field(
297
+ default=None,
298
+ title="Customized Prompt",
299
+ description=(
300
+ "The customized prompt for the retrieve chat. Default is None."
301
+ ),
302
+ ),
303
+ ]
304
+ customized_answer_prefix: Annotated[
305
+ Optional[str],
306
+ Field(
307
+ default="",
308
+ title="Customized Answer Prefix",
309
+ description=(
310
+ "The customized answer prefix for the retrieve chat. "
311
+ "Default is ''. If not '' and the customized_answer_prefix is "
312
+ "not in the answer, Update Context will be triggered."
313
+ ),
314
+ ),
315
+ ]
316
+ update_context: Annotated[
317
+ bool,
318
+ Field(
319
+ default=True,
320
+ title="Update Context",
321
+ description=(
322
+ "If False, will not apply Update Context for interactive "
323
+ "retrieval. Default is True."
324
+ ),
325
+ ),
326
+ ]
327
+ collection_name: Annotated[
328
+ str,
329
+ Field(
330
+ default="autogen-docs",
331
+ title="Collection Name",
332
+ description=(
333
+ "The name of the collection. If key not provided, "
334
+ "a default name autogen-docs will be used."
335
+ ),
336
+ ),
337
+ ]
338
+ get_or_create: Annotated[
339
+ bool,
340
+ Field(
341
+ default=False,
342
+ title="Get or Create",
343
+ description=(
344
+ "Whether to get the collection if it exists. Default is False."
345
+ ),
346
+ ),
347
+ ]
348
+ overwrite: Annotated[
349
+ bool,
350
+ Field(
351
+ default=False,
352
+ title="Overwrite",
353
+ description=(
354
+ "Whether to overwrite the collection if it exists. "
355
+ "Default is False. "
356
+ "Case 1. if the collection does not exist,"
357
+ " create the collection. "
358
+ "Case 2. the collection exists, if overwrite is True,"
359
+ " it will overwrite the collection. "
360
+ "Case 3. the collection exists and overwrite is False, if"
361
+ " get_or_create is True, it will get the collection,"
362
+ " otherwise it raise a ValueError."
363
+ ),
364
+ ),
365
+ ]
366
+ use_custom_token_count: Annotated[
367
+ bool,
368
+ Field(
369
+ default=False,
370
+ title="Use Custom Token Count",
371
+ description=(
372
+ "Whether to use custom token count function for the retrieve "
373
+ "chat. Default is False. If True, the "
374
+ "custom_token_count_function should be provided."
375
+ ),
376
+ ),
377
+ ]
378
+ custom_token_count_function: Annotated[
379
+ Optional[str],
380
+ Field(
381
+ default=None,
382
+ title="Custom Token Count Function",
383
+ description=(
384
+ "A custom function to count the number of tokens in a string. "
385
+ "The function should take (text:str, model:str) as input "
386
+ "and return the token_count(int). the retrieve_config['model'] "
387
+ "will be passed in the function. "
388
+ "Default is autogen.token_count_utils.count_token that uses "
389
+ "tiktoken, which may not be accurate for non-OpenAI models."
390
+ ),
391
+ ),
392
+ ]
393
+ use_custom_text_split: Annotated[
394
+ bool,
395
+ Field(
396
+ default=False,
397
+ title="Use Custom Text Split",
398
+ description=(
399
+ "Whether to use custom text split function for the retrieve "
400
+ "chat. Default is False. If True, the "
401
+ "custom_text_split_function should be provided."
402
+ ),
403
+ ),
404
+ ]
405
+ custom_text_split_function: Annotated[
406
+ Optional[str],
407
+ Field(
408
+ default=None,
409
+ title="Custom Text Split Function",
410
+ description=(
411
+ "A custom function to split a string into a list of strings. "
412
+ "Default is None, will use the default function in "
413
+ "autogen.retrieve_utils.split_text_to_chunks."
414
+ ),
415
+ ),
416
+ ]
417
+ custom_text_types: Annotated[
418
+ Optional[List[str]],
419
+ Field(
420
+ default=None,
421
+ title="Custom Text Types",
422
+ description=(
423
+ "A list of file types to be processed. "
424
+ "Default is autogen.retrieve_utils.TEXT_FORMATS. "
425
+ "This only applies to files under the directories in "
426
+ "docs_path. Explicitly included files and urls will be "
427
+ "chunked regardless of their types."
428
+ ),
429
+ ),
430
+ ]
431
+ recursive: Annotated[
432
+ bool,
433
+ Field(
434
+ default=True,
435
+ title="Recursive",
436
+ description=(
437
+ "Whether to search documents recursively in the docs_path. "
438
+ "Default is True."
439
+ ),
440
+ ),
441
+ ]
442
+ distance_threshold: Annotated[
443
+ float,
444
+ Field(
445
+ default=-1,
446
+ title="Distance Threshold",
447
+ description=(
448
+ "The threshold for the distance score, only distance"
449
+ " smaller than this will be returned. "
450
+ "Will be ignored if < 0. Default is -1."
451
+ ),
452
+ ),
453
+ ]
454
+ n_results: Annotated[
455
+ Optional[int],
456
+ Field(
457
+ default=None,
458
+ title="Number of Results",
459
+ description=(
460
+ "The number of results to return. Default is None, "
461
+ "which will return all."
462
+ "Use None or <1 to return all results."
463
+ ),
464
+ ),
465
+ ]
466
+ _embedding_function_string: Optional[str] = None
467
+
468
+ _token_count_function_string: Optional[str] = None
469
+
470
+ _text_split_function_string: Optional[str] = None
471
+
472
+ @property
473
+ def embedding_function_string(self) -> Optional[str]:
474
+ """Get the embedding function string.
475
+
476
+ Returns
477
+ -------
478
+ Optional[str]
479
+ The embedding function string.
480
+ """
481
+ return self._embedding_function_string
482
+
483
+ @property
484
+ def token_count_function_string(self) -> Optional[str]:
485
+ """Get the token count function string.
486
+
487
+ Returns
488
+ -------
489
+ Optional[str]
490
+ The token count function string.
491
+ """
492
+ return self._token_count_function_string
493
+
494
+ @property
495
+ def text_split_function_string(self) -> Optional[str]:
496
+ """Get the text split function string.
497
+
498
+ Returns
499
+ -------
500
+ Optional[str]
501
+ The text split function string.
502
+ """
503
+ return self._text_split_function_string
504
+
505
+ def validate_custom_embedding_function(self) -> None:
506
+ """Validate the custom embedding function.
507
+
508
+ Raises
509
+ ------
510
+ ValueError
511
+ If the validation fails.
512
+ """
513
+ if self.use_custom_embedding:
514
+ if not self.embedding_function:
515
+ raise ValueError(
516
+ "The embedding_function is required "
517
+ "if use_custom_embedding is True."
518
+ )
519
+ function_name: WaldiezMethodName = "custom_embedding_function"
520
+ valid, error_or_content = check_function(
521
+ self.embedding_function, function_name
522
+ )
523
+ if not valid:
524
+ raise ValueError(error_or_content)
525
+ self._embedding_function_string = error_or_content
526
+
527
+ def validate_custom_token_count_function(self) -> None:
528
+ """Validate the custom token count function.
529
+
530
+ Raises
531
+ ------
532
+ ValueError
533
+ If the validation fails.
534
+ """
535
+ if self.use_custom_token_count:
536
+ if not self.custom_token_count_function:
537
+ raise ValueError(
538
+ "The custom_token_count_function is required "
539
+ "if use_custom_token_count is True."
540
+ )
541
+ function_name: WaldiezMethodName = "custom_token_count_function"
542
+ valid, error_or_content = check_function(
543
+ self.custom_token_count_function, function_name
544
+ )
545
+ if not valid:
546
+ raise ValueError(error_or_content)
547
+ self._token_count_function_string = error_or_content
548
+
549
+ def validate_custom_text_split_function(self) -> None:
550
+ """Validate the custom text split function.
551
+
552
+ Raises
553
+ ------
554
+ ValueError
555
+ If the validation fails.
556
+ """
557
+ if self.use_custom_text_split:
558
+ if not self.custom_text_split_function:
559
+ raise ValueError(
560
+ "The custom_text_split_function is required "
561
+ "if use_custom_text_split is True."
562
+ )
563
+ function_name: WaldiezMethodName = "custom_text_split_function"
564
+ valid, error_or_content = check_function(
565
+ self.custom_text_split_function, function_name
566
+ )
567
+ if not valid:
568
+ raise ValueError(error_or_content)
569
+ self._text_split_function_string = error_or_content
570
+
571
+ @model_validator(mode="after")
572
+ def validate_rag_user_data(self) -> Self:
573
+ """Validate the RAG user data.
574
+
575
+ Raises
576
+ ------
577
+ ValueError
578
+ If the validation fails.
579
+
580
+ Returns
581
+ -------
582
+ WaldiezRagUserData
583
+ The validated RAG user data.
584
+ """
585
+ self.validate_custom_embedding_function()
586
+ self.validate_custom_token_count_function()
587
+ self.validate_custom_text_split_function()
588
+ if not self.db_config.model:
589
+ self.db_config.model = WaldiezRagUserModels[self.vector_db]
590
+ if isinstance(self.n_results, int) and self.n_results < 1:
591
+ self.n_results = None
592
+ return self