unique_toolkit 1.3.3__tar.gz → 1.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/CHANGELOG.md +6 -0
  2. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/PKG-INFO +7 -1
  3. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/pyproject.toml +1 -1
  4. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/evaluation/schemas.py +1 -0
  5. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/a2a/config.py +6 -0
  6. unique_toolkit-1.4.1/unique_toolkit/agentic/tools/a2a/evaluation/__init__.py +6 -0
  7. unique_toolkit-1.4.1/unique_toolkit/agentic/tools/a2a/evaluation/config.py +38 -0
  8. unique_toolkit-1.4.1/unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py +209 -0
  9. unique_toolkit-1.4.1/unique_toolkit/agentic/tools/a2a/evaluation/summarization_user_message.j2 +9 -0
  10. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/a2a/service.py +6 -1
  11. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/LICENSE +0 -0
  12. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/README.md +0 -0
  13. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/__init__.py +0 -0
  14. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/_base_service.py +0 -0
  15. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/_time_utils.py +0 -0
  16. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/api_calling/human_verification_manager.py +0 -0
  17. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/base_model_type_attribute.py +0 -0
  18. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/chunk_relevancy_sorter/config.py +0 -0
  19. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/chunk_relevancy_sorter/exception.py +0 -0
  20. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/chunk_relevancy_sorter/schemas.py +0 -0
  21. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/chunk_relevancy_sorter/service.py +0 -0
  22. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/chunk_relevancy_sorter/tests/test_service.py +0 -0
  23. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/default_language_model.py +0 -0
  24. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/endpoint_builder.py +0 -0
  25. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/endpoint_requestor.py +0 -0
  26. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/exception.py +0 -0
  27. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/feature_flags/schema.py +0 -0
  28. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/pydantic/rjsf_tags.py +0 -0
  29. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/pydantic_helpers.py +0 -0
  30. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/string_utilities.py +0 -0
  31. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/token/image_token_counting.py +0 -0
  32. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/token/token_counting.py +0 -0
  33. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/utils/__init__.py +0 -0
  34. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/utils/structured_output/__init__.py +0 -0
  35. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/utils/structured_output/schema.py +0 -0
  36. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/utils/write_configuration.py +0 -0
  37. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/validate_required_values.py +0 -0
  38. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/_common/validators.py +0 -0
  39. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/__init__.py +0 -0
  40. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/debug_info_manager/debug_info_manager.py +0 -0
  41. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/evaluation/config.py +0 -0
  42. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/evaluation/context_relevancy/prompts.py +0 -0
  43. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/evaluation/context_relevancy/schema.py +0 -0
  44. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/evaluation/context_relevancy/service.py +0 -0
  45. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/evaluation/evaluation_manager.py +0 -0
  46. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/evaluation/exception.py +0 -0
  47. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/evaluation/hallucination/constants.py +0 -0
  48. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/evaluation/hallucination/hallucination_evaluation.py +0 -0
  49. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/evaluation/hallucination/prompts.py +0 -0
  50. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/evaluation/hallucination/service.py +0 -0
  51. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/evaluation/hallucination/utils.py +0 -0
  52. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/evaluation/output_parser.py +0 -0
  53. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py +0 -0
  54. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/evaluation/tests/test_output_parser.py +0 -0
  55. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/history_manager/history_construction_with_contents.py +0 -0
  56. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/history_manager/history_manager.py +0 -0
  57. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/history_manager/loop_token_reducer.py +0 -0
  58. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/history_manager/utils.py +0 -0
  59. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/postprocessor/postprocessor_manager.py +0 -0
  60. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/reference_manager/reference_manager.py +0 -0
  61. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/short_term_memory_manager/persistent_short_term_memory_manager.py +0 -0
  62. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/thinking_manager/thinking_manager.py +0 -0
  63. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/__init__.py +0 -0
  64. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/a2a/__init__.py +0 -0
  65. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/a2a/manager.py +0 -0
  66. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/a2a/memory.py +0 -0
  67. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/a2a/postprocessing/__init__.py +0 -0
  68. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/a2a/postprocessing/display.py +0 -0
  69. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/a2a/postprocessing/postprocessor.py +0 -0
  70. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/a2a/postprocessing/test/test_display.py +0 -0
  71. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/a2a/schema.py +0 -0
  72. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/agent_chunks_hanlder.py +0 -0
  73. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/config.py +0 -0
  74. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/factory.py +0 -0
  75. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/mcp/__init__.py +0 -0
  76. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/mcp/manager.py +0 -0
  77. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/mcp/models.py +0 -0
  78. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/mcp/tool_wrapper.py +0 -0
  79. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/schemas.py +0 -0
  80. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/test/test_mcp_manager.py +0 -0
  81. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/test/test_tool_progress_reporter.py +0 -0
  82. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/tool.py +0 -0
  83. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/tool_manager.py +0 -0
  84. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/tool_progress_reporter.py +0 -0
  85. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/utils/__init__.py +0 -0
  86. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/utils/execution/__init__.py +0 -0
  87. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/utils/execution/execution.py +0 -0
  88. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/utils/source_handling/__init__.py +0 -0
  89. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/utils/source_handling/schema.py +0 -0
  90. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/utils/source_handling/source_formatting.py +0 -0
  91. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/agentic/tools/utils/source_handling/tests/test_source_formatting.py +0 -0
  92. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/app/__init__.py +0 -0
  93. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/app/dev_util.py +0 -0
  94. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/app/init_logging.py +0 -0
  95. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/app/init_sdk.py +0 -0
  96. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/app/performance/async_tasks.py +0 -0
  97. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/app/performance/async_wrapper.py +0 -0
  98. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/app/schemas.py +0 -0
  99. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/app/unique_settings.py +0 -0
  100. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/app/verification.py +0 -0
  101. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/chat/__init__.py +0 -0
  102. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/chat/constants.py +0 -0
  103. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/chat/functions.py +0 -0
  104. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/chat/schemas.py +0 -0
  105. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/chat/service.py +0 -0
  106. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/chat/state.py +0 -0
  107. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/chat/utils.py +0 -0
  108. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/content/__init__.py +0 -0
  109. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/content/constants.py +0 -0
  110. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/content/functions.py +0 -0
  111. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/content/schemas.py +0 -0
  112. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/content/service.py +0 -0
  113. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/content/utils.py +0 -0
  114. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/embedding/__init__.py +0 -0
  115. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/embedding/constants.py +0 -0
  116. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/embedding/functions.py +0 -0
  117. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/embedding/schemas.py +0 -0
  118. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/embedding/service.py +0 -0
  119. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/embedding/utils.py +0 -0
  120. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/framework_utilities/__init__.py +0 -0
  121. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/framework_utilities/langchain/client.py +0 -0
  122. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/framework_utilities/langchain/history.py +0 -0
  123. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/framework_utilities/openai/__init__.py +0 -0
  124. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/framework_utilities/openai/client.py +0 -0
  125. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/framework_utilities/openai/message_builder.py +0 -0
  126. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/framework_utilities/utils.py +0 -0
  127. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/language_model/__init__.py +0 -0
  128. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/language_model/builder.py +0 -0
  129. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/language_model/constants.py +0 -0
  130. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/language_model/functions.py +0 -0
  131. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/language_model/infos.py +0 -0
  132. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/language_model/prompt.py +0 -0
  133. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/language_model/reference.py +0 -0
  134. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/language_model/schemas.py +0 -0
  135. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/language_model/service.py +0 -0
  136. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/language_model/utils.py +0 -0
  137. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/protocols/support.py +0 -0
  138. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/short_term_memory/__init__.py +0 -0
  139. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/short_term_memory/constants.py +0 -0
  140. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/short_term_memory/functions.py +0 -0
  141. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/short_term_memory/schemas.py +0 -0
  142. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/short_term_memory/service.py +0 -0
  143. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/smart_rules/__init__.py +0 -0
  144. {unique_toolkit-1.3.3 → unique_toolkit-1.4.1}/unique_toolkit/smart_rules/compile.py +0 -0
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [1.4.1] - 2025-09-30
9
+ - Handle sub agent failed assessments better in sub agent evaluator.
10
+
11
+ ## [1.4.0] - 2025-09-29
12
+ - Add ability to consolidate sub agent's assessments.
13
+
8
14
  ## [1.3.3] - 2025-09-30
9
15
  - fix bug in exclusive tools not making them selectable
10
16
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unique_toolkit
3
- Version: 1.3.3
3
+ Version: 1.4.1
4
4
  Summary:
5
5
  License: Proprietary
6
6
  Author: Cedric Klinkert
@@ -118,6 +118,12 @@ All notable changes to this project will be documented in this file.
118
118
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
119
119
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
120
120
 
121
+ ## [1.4.1] - 2025-09-30
122
+ - Handle sub agent failed assessments better in sub agent evaluator.
123
+
124
+ ## [1.4.0] - 2025-09-29
125
+ - Add ability to consolidate sub agent's assessments.
126
+
121
127
  ## [1.3.3] - 2025-09-30
122
128
  - fix bug in exclusive tools not making them selectable
123
129
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "unique_toolkit"
3
- version = "1.3.3"
3
+ version = "1.4.1"
4
4
  description = ""
5
5
  authors = [
6
6
  "Cedric Klinkert <cedric.klinkert@unique.ch>",
@@ -15,6 +15,7 @@ from unique_toolkit.chat.schemas import (
15
15
  class EvaluationMetricName(StrEnum):
16
16
  HALLUCINATION = "hallucination"
17
17
  CONTEXT_RELEVANCY = "relevancy"
18
+ SUB_AGENT = "sub_agent"
18
19
 
19
20
 
20
21
  class EvaluationMetricInputFieldName(StrEnum):
@@ -23,6 +23,11 @@ class SubAgentToolDisplayConfig(BaseModel):
23
23
  remove_from_history: bool = True
24
24
 
25
25
 
26
+ class SubAgentEvaluationConfig(BaseModel):
27
+ model_config = get_configuration_dict()
28
+ display_evalution: bool = True
29
+
30
+
26
31
  class SubAgentToolConfig(BaseToolConfig):
27
32
  model_config = get_configuration_dict()
28
33
 
@@ -43,3 +48,4 @@ class SubAgentToolConfig(BaseToolConfig):
43
48
  max_wait: float = 120.0
44
49
 
45
50
  response_display_config: SubAgentToolDisplayConfig = SubAgentToolDisplayConfig()
51
+ evaluation_config: SubAgentEvaluationConfig = SubAgentEvaluationConfig()
@@ -0,0 +1,6 @@
1
+ from unique_toolkit.agentic.tools.a2a.evaluation.config import SubAgentEvaluationConfig
2
+ from unique_toolkit.agentic.tools.a2a.evaluation.evaluator import (
3
+ SubAgentsEvaluation,
4
+ )
5
+
6
+ __all__ = ["SubAgentsEvaluation", "SubAgentEvaluationConfig"]
@@ -0,0 +1,38 @@
1
+ from pathlib import Path
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+ from unique_toolkit._common.default_language_model import DEFAULT_GPT_4o
6
+ from unique_toolkit._common.validators import LMI, get_LMI_default_field
7
+ from unique_toolkit.chat.schemas import (
8
+ ChatMessageAssessmentType,
9
+ )
10
+
11
+ DEFAULT_EVALUATION_SYSTEM_MESSAGE_TEMPLATE = """
12
+ You are a through and precise summarization model.
13
+ You will receive a list of "assessments" of one or more agent(s) response(s).
14
+ Your task is to give a brief summary (1-10 sentences) of the received assessments, following the following guidelines:
15
+ 1. You must NOT in ANY case state a fact that is not stated in the given assessments.
16
+ 2. You must focus first and foremost on the failing assessments, labeled `RED` below.
17
+ 3. You must mention each agent's name when summarizing its list of assessments.
18
+ """.strip()
19
+
20
+ with open(Path(__file__).parent / "summarization_user_message.j2", "r") as file:
21
+ DEFAULT_SUMMARIZATION_USER_MESSAGE_TEMPLATE = file.read().strip()
22
+
23
+
24
+ class SubAgentEvaluationConfig(BaseModel):
25
+ assessment_type: ChatMessageAssessmentType = Field(
26
+ default=ChatMessageAssessmentType.COMPLIANCE,
27
+ description="The type of assessment to use in the display.",
28
+ )
29
+
30
+ summarization_model: LMI = get_LMI_default_field(DEFAULT_GPT_4o)
31
+ summarization_system_message: str = Field(
32
+ default=DEFAULT_EVALUATION_SYSTEM_MESSAGE_TEMPLATE,
33
+ description="The system message template for the summarization model.",
34
+ )
35
+ summarization_user_message_template: str = Field(
36
+ default=DEFAULT_SUMMARIZATION_USER_MESSAGE_TEMPLATE,
37
+ description="The user message template for the summarization model.",
38
+ )
@@ -0,0 +1,209 @@
1
+ import logging
2
+ from collections import defaultdict
3
+ from typing import override
4
+
5
+ import unique_sdk
6
+ from jinja2 import Template
7
+ from typing_extensions import TypedDict
8
+
9
+ from unique_toolkit.agentic.evaluation.evaluation_manager import Evaluation
10
+ from unique_toolkit.agentic.evaluation.schemas import (
11
+ EvaluationAssessmentMessage,
12
+ EvaluationMetricName,
13
+ EvaluationMetricResult,
14
+ )
15
+ from unique_toolkit.agentic.tools.a2a.evaluation.config import SubAgentEvaluationConfig
16
+ from unique_toolkit.agentic.tools.a2a.service import SubAgentTool
17
+ from unique_toolkit.chat.schemas import (
18
+ ChatMessageAssessmentLabel,
19
+ ChatMessageAssessmentStatus,
20
+ ChatMessageAssessmentType,
21
+ )
22
+ from unique_toolkit.language_model.builder import MessagesBuilder
23
+ from unique_toolkit.language_model.schemas import LanguageModelStreamResponse
24
+ from unique_toolkit.language_model.service import LanguageModelService
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class _SubAgentToolInfo(TypedDict):
30
+ assessment: list[unique_sdk.Space.Assessment] | None
31
+ display_name: str
32
+
33
+
34
+ NO_ASSESSMENTS_FOUND = "NO_ASSESSMENTS_FOUND"
35
+
36
+
37
+ class SubAgentsEvaluation(Evaluation):
38
+ DISPLAY_NAME = "Sub Agents"
39
+
40
+ def __init__(
41
+ self,
42
+ config: SubAgentEvaluationConfig,
43
+ sub_agent_tools: list[SubAgentTool],
44
+ language_model_service: LanguageModelService,
45
+ ):
46
+ super().__init__(EvaluationMetricName.SUB_AGENT)
47
+ self._config = config
48
+
49
+ self._assistant_id_to_tool_info: dict[str, _SubAgentToolInfo] = {}
50
+ self._language_model_service = language_model_service
51
+
52
+ for sub_agent_tool in sub_agent_tools:
53
+ if sub_agent_tool.config.evaluation_config.display_evalution:
54
+ sub_agent_tool.subscribe(self)
55
+ self._assistant_id_to_tool_info[sub_agent_tool.config.assistant_id] = {
56
+ "assessment": None,
57
+ "display_name": sub_agent_tool.display_name(),
58
+ }
59
+
60
+ @override
61
+ def get_assessment_type(self) -> ChatMessageAssessmentType:
62
+ return self._config.assessment_type
63
+
64
+ @override
65
+ async def run(
66
+ self, loop_response: LanguageModelStreamResponse
67
+ ) -> EvaluationMetricResult:
68
+ logger.info("Running sub agents evaluation")
69
+
70
+ sub_agents_display_data = []
71
+
72
+ value = ChatMessageAssessmentLabel.GREEN
73
+
74
+ # Use a dict in order to compare labels (RED being the worst)
75
+ label_comparison_dict = defaultdict(
76
+ lambda: 3
77
+ ) # Unkown labels are highest in the sorting
78
+ label_comparison_dict[ChatMessageAssessmentLabel.GREEN] = 2
79
+ label_comparison_dict[ChatMessageAssessmentLabel.YELLOW] = 1
80
+ label_comparison_dict[ChatMessageAssessmentLabel.RED] = 0
81
+
82
+ for assistant_id, tool_info in self._assistant_id_to_tool_info.items():
83
+ assessments = tool_info["assessment"] or []
84
+ valid_assessments = []
85
+ for assessment in assessments:
86
+ if (
87
+ assessment["label"] is None
88
+ or assessment["label"] not in ChatMessageAssessmentLabel
89
+ ):
90
+ logger.warning(
91
+ "Unkown assistant label %s for assistant %s will be ignored",
92
+ assessment["label"],
93
+ assistant_id,
94
+ )
95
+ continue
96
+ if assessment["status"] != ChatMessageAssessmentStatus.DONE:
97
+ logger.warning(
98
+ "Assessment %s for assistant %s is not done (status: %s) will be ignored",
99
+ assessment["label"],
100
+ assistant_id,
101
+ )
102
+ continue
103
+ valid_assessments.append(assessment)
104
+
105
+ if len(valid_assessments) == 0:
106
+ logger.info("No valid assessment found for assistant %s", assistant_id)
107
+ continue
108
+
109
+ assessments = sorted(
110
+ valid_assessments, key=lambda x: label_comparison_dict[x["label"]]
111
+ )
112
+
113
+ for assessment in assessments:
114
+ value = min(
115
+ value, assessment["label"], key=lambda x: label_comparison_dict[x]
116
+ )
117
+
118
+ sub_agents_display_data.append(
119
+ {
120
+ "name": tool_info["display_name"],
121
+ "assessments": assessments,
122
+ }
123
+ )
124
+
125
+ if len(sub_agents_display_data) == 0:
126
+ logger.warning("No valid sub agent assessments found")
127
+ return EvaluationMetricResult(
128
+ name=self.get_name(),
129
+ value=NO_ASSESSMENTS_FOUND,
130
+ reason="No sub agents assessments found",
131
+ )
132
+
133
+ should_summarize = False
134
+ reason = ""
135
+
136
+ if len(sub_agents_display_data) > 1:
137
+ should_summarize = True
138
+ elif len(sub_agents_display_data) == 1:
139
+ if len(sub_agents_display_data[0]["assessments"]) > 1:
140
+ should_summarize = True
141
+ else:
142
+ reason = (
143
+ sub_agents_display_data[0]["assessments"][0]["explanation"] or ""
144
+ )
145
+
146
+ if should_summarize:
147
+ messages = (
148
+ MessagesBuilder()
149
+ .system_message_append(self._config.summarization_system_message)
150
+ .user_message_append(
151
+ Template(self._config.summarization_user_message_template).render(
152
+ sub_agents=sub_agents_display_data,
153
+ )
154
+ )
155
+ .build()
156
+ )
157
+
158
+ reason = await self._language_model_service.complete_async(
159
+ messages=messages,
160
+ model_name=self._config.summarization_model.name,
161
+ temperature=0.0,
162
+ )
163
+ reason = str(reason.choices[0].message.content)
164
+
165
+ return EvaluationMetricResult(
166
+ name=self.get_name(),
167
+ value=value,
168
+ reason=reason,
169
+ is_positive=value == ChatMessageAssessmentLabel.GREEN,
170
+ )
171
+
172
+ @override
173
+ async def evaluation_metric_to_assessment(
174
+ self, evaluation_result: EvaluationMetricResult
175
+ ) -> EvaluationAssessmentMessage:
176
+ if evaluation_result.value == NO_ASSESSMENTS_FOUND:
177
+ return EvaluationAssessmentMessage(
178
+ status=ChatMessageAssessmentStatus.DONE,
179
+ explanation="No valid sub agents assessments found to consolidate.",
180
+ title=self.DISPLAY_NAME,
181
+ label=ChatMessageAssessmentLabel.GREEN,
182
+ type=self.get_assessment_type(),
183
+ )
184
+
185
+ return EvaluationAssessmentMessage(
186
+ status=ChatMessageAssessmentStatus.DONE,
187
+ explanation=evaluation_result.reason,
188
+ title=self.DISPLAY_NAME,
189
+ label=evaluation_result.value, # type: ignore
190
+ type=self.get_assessment_type(),
191
+ )
192
+
193
+ def notify_sub_agent_response(
194
+ self, sub_agent_assistant_id: str, response: unique_sdk.Space.Message
195
+ ) -> None:
196
+ if sub_agent_assistant_id not in self._assistant_id_to_tool_info:
197
+ logger.warning(
198
+ "Unknown assistant id %s received, assessment will be ignored.",
199
+ sub_agent_assistant_id,
200
+ )
201
+ return
202
+
203
+ self._assistant_id_to_tool_info[sub_agent_assistant_id]["assessment"] = (
204
+ response[
205
+ "assessment"
206
+ ].copy() # Shallow copy as we don't modify individual assessments
207
+ if response["assessment"] is not None
208
+ else None
209
+ )
@@ -0,0 +1,9 @@
1
+ Here are the sub_agent(s) assessments:
2
+
3
+ {% for sub_agent in sub_agents %}
4
+ - Agent name: {{ sub_agent.name }}
5
+ {% for assessment in sub_agent.assessments %}
6
+ - {{ assessment.title }}: {{ assessment.label }}
7
+ {{ assessment.explanation }}
8
+ {% endfor %}
9
+ {% endfor %}
@@ -64,6 +64,7 @@ class SubAgentTool(Tool[SubAgentToolConfig]):
64
64
  assistant_id=self.config.assistant_id,
65
65
  )
66
66
  self._subscribers: list[SubAgentResponseSubscriber] = []
67
+ self._should_run_evaluation = False
67
68
 
68
69
  def display_name(self) -> str:
69
70
  return self._display_name
@@ -99,7 +100,7 @@ class SubAgentTool(Tool[SubAgentToolConfig]):
99
100
  return self.config.tool_format_information_for_user_prompt
100
101
 
101
102
  def evaluation_check_list(self) -> list[EvaluationMetricName]:
102
- return []
103
+ return [EvaluationMetricName.SUB_AGENT] if self._should_run_evaluation else []
103
104
 
104
105
  def get_evaluation_checks_based_on_tool_response(
105
106
  self,
@@ -194,6 +195,10 @@ class SubAgentTool(Tool[SubAgentToolConfig]):
194
195
  tool_call=tool_call,
195
196
  )
196
197
 
198
+ self._should_run_evaluation = (
199
+ response["assessment"] is not None and len(response["assessment"]) > 0
200
+ )
201
+
197
202
  self._notify_subscribers(response)
198
203
 
199
204
  if chat_id is None and self.config.reuse_chat:
File without changes
File without changes