unique_toolkit 0.8.18__tar.gz → 0.8.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/CHANGELOG.md +4 -0
  2. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/PKG-INFO +5 -1
  3. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/pyproject.toml +2 -2
  4. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/_common/default_language_model.py +1 -1
  5. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/_common/token/token_counting.py +9 -20
  6. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/_common/validators.py +2 -3
  7. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/debug_info_manager/debug_info_manager.py +0 -1
  8. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evals/config.py +0 -1
  9. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evals/context_relevancy/schema.py +1 -6
  10. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evals/evaluation_manager.py +10 -12
  11. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evals/hallucination/constants.py +0 -1
  12. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evals/hallucination/hallucination_evaluation.py +5 -8
  13. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evals/hallucination/service.py +0 -1
  14. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evals/hallucination/utils.py +7 -8
  15. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evals/schemas.py +2 -1
  16. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evals/tests/test_context_relevancy_service.py +10 -9
  17. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evals/tests/test_output_parser.py +8 -4
  18. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/history_manager/history_construction_with_contents.py +10 -20
  19. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/history_manager/history_manager.py +11 -22
  20. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/history_manager/loop_token_reducer.py +121 -109
  21. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/history_manager/utils.py +0 -1
  22. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/language_model/infos.py +1 -1
  23. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/language_model/schemas.py +0 -1
  24. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/postprocessor/postprocessor_manager.py +1 -3
  25. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/reference_manager/reference_manager.py +3 -4
  26. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/short_term_memory/persistent_short_term_memory_manager.py +2 -1
  27. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/thinking_manager/thinking_manager.py +2 -1
  28. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/tools/config.py +4 -5
  29. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/tools/factory.py +2 -8
  30. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/tools/test/test_tool_progress_reporter.py +1 -0
  31. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/tools/tool.py +3 -7
  32. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/tools/tool_manager.py +3 -2
  33. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/tools/tool_progress_reporter.py +1 -0
  34. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/tools/utils/source_handling/schema.py +0 -1
  35. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/tools/utils/source_handling/tests/test_source_formatting.py +1 -0
  36. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/LICENSE +0 -0
  37. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/README.md +0 -0
  38. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/__init__.py +0 -0
  39. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/_common/_base_service.py +0 -0
  40. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/_common/_time_utils.py +0 -0
  41. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/_common/exception.py +0 -0
  42. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/_common/token/image_token_counting.py +0 -0
  43. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/_common/validate_required_values.py +0 -0
  44. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/app/__init__.py +0 -0
  45. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/app/dev_util.py +0 -0
  46. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/app/init_logging.py +0 -0
  47. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/app/init_sdk.py +0 -0
  48. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/app/performance/async_tasks.py +0 -0
  49. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/app/performance/async_wrapper.py +0 -0
  50. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/app/schemas.py +0 -0
  51. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/app/unique_settings.py +0 -0
  52. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/app/verification.py +0 -0
  53. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/chat/__init__.py +0 -0
  54. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/chat/constants.py +0 -0
  55. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/chat/functions.py +0 -0
  56. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/chat/schemas.py +0 -0
  57. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/chat/service.py +0 -0
  58. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/chat/state.py +0 -0
  59. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/chat/utils.py +0 -0
  60. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/content/__init__.py +0 -0
  61. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/content/constants.py +0 -0
  62. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/content/functions.py +0 -0
  63. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/content/schemas.py +0 -0
  64. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/content/service.py +0 -0
  65. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/content/utils.py +0 -0
  66. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/embedding/__init__.py +0 -0
  67. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/embedding/constants.py +0 -0
  68. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/embedding/functions.py +0 -0
  69. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/embedding/schemas.py +0 -0
  70. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/embedding/service.py +0 -0
  71. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/embedding/utils.py +0 -0
  72. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evals/context_relevancy/prompts.py +0 -0
  73. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evals/context_relevancy/service.py +13 -13
  74. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evals/exception.py +0 -0
  75. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evals/hallucination/prompts.py +0 -0
  76. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evals/output_parser.py +1 -1
  77. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evaluators/__init__.py +0 -0
  78. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evaluators/config.py +0 -0
  79. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evaluators/constants.py +0 -0
  80. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evaluators/context_relevancy/constants.py +0 -0
  81. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evaluators/context_relevancy/prompts.py +0 -0
  82. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evaluators/context_relevancy/service.py +0 -0
  83. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evaluators/context_relevancy/utils.py +0 -0
  84. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evaluators/exception.py +0 -0
  85. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evaluators/hallucination/constants.py +0 -0
  86. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evaluators/hallucination/prompts.py +0 -0
  87. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evaluators/hallucination/service.py +0 -0
  88. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evaluators/hallucination/utils.py +0 -0
  89. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evaluators/output_parser.py +0 -0
  90. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/evaluators/schemas.py +0 -0
  91. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/framework_utilities/langchain/client.py +0 -0
  92. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/framework_utilities/langchain/history.py +0 -0
  93. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/framework_utilities/openai/client.py +0 -0
  94. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/framework_utilities/openai/message_builder.py +0 -0
  95. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/framework_utilities/utils.py +0 -0
  96. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/language_model/__init__.py +0 -0
  97. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/language_model/builder.py +0 -0
  98. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/language_model/constants.py +0 -0
  99. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/language_model/functions.py +0 -0
  100. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/language_model/prompt.py +0 -0
  101. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/language_model/reference.py +0 -0
  102. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/language_model/service.py +0 -0
  103. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/language_model/utils.py +0 -0
  104. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/protocols/support.py +0 -0
  105. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/short_term_memory/__init__.py +0 -0
  106. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/short_term_memory/constants.py +0 -0
  107. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/short_term_memory/functions.py +0 -0
  108. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/short_term_memory/schemas.py +0 -0
  109. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/short_term_memory/service.py +0 -0
  110. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/smart_rules/__init__.py +0 -0
  111. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/smart_rules/compile.py +0 -0
  112. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/tools/schemas.py +1 -1
  113. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/tools/utils/execution/execution.py +0 -0
  114. {unique_toolkit-0.8.18 → unique_toolkit-0.8.19}/unique_toolkit/tools/utils/source_handling/source_formatting.py +0 -0
@@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+
9
+ ## [0.8.19] - 2025-08-24
10
+ - Enforce usage of ruff using pipeline
11
+
8
12
  ## [0.8.18] - 2025-08-22
9
13
  - moved class variables into instance variables
10
14
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unique_toolkit
3
- Version: 0.8.18
3
+ Version: 0.8.19
4
4
  Summary:
5
5
  License: Proprietary
6
6
  Author: Martin Fadler
@@ -114,6 +114,10 @@ All notable changes to this project will be documented in this file.
114
114
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
115
115
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
116
116
 
117
+
118
+ ## [0.8.19] - 2025-08-24
119
+ - Enforce usage of ruff using pipeline
120
+
117
121
  ## [0.8.18] - 2025-08-22
118
122
  - moved class variables into instance variables
119
123
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "unique_toolkit"
3
- version = "0.8.18"
3
+ version = "0.8.19"
4
4
  description = ""
5
5
  authors = [
6
6
  "Martin Fadler <martin.fadler@unique.ch>",
@@ -34,7 +34,7 @@ langchain = "^0.3.27"
34
34
  langchain-openai = "^0.3.28"
35
35
 
36
36
  [tool.poetry.group.dev.dependencies]
37
- ruff = "0.11.7"
37
+ ruff = "0.12.10"
38
38
  pytest = "^7.4.3"
39
39
  tox = "^4.11.4"
40
40
  pyright = "^1.1.341"
@@ -3,4 +3,4 @@ from unique_toolkit.language_model.infos import LanguageModelName
3
3
  DEFAULT_GPT_35_TURBO = LanguageModelName.AZURE_GPT_35_TURBO_0125
4
4
  DEFAULT_GPT_4o = LanguageModelName.AZURE_GPT_4o_2024_1120
5
5
  DEFAULT_GPT_4o_STRUCTURED_OUTPUT = LanguageModelName.AZURE_GPT_4o_2024_0806
6
- DEFAULT_GPT_4o_MINI = LanguageModelName.AZURE_GPT_4o_MINI_2024_0718
6
+ DEFAULT_GPT_4o_MINI = LanguageModelName.AZURE_GPT_4o_MINI_2024_0718
@@ -5,7 +5,10 @@ import json
5
5
  from typing import Any, Callable
6
6
 
7
7
  from pydantic import BaseModel
8
- from unique_toolkit._common.token.image_token_counting import calculate_image_tokens_from_base64
8
+
9
+ from unique_toolkit._common.token.image_token_counting import (
10
+ calculate_image_tokens_from_base64,
11
+ )
9
12
  from unique_toolkit.language_model import (
10
13
  LanguageModelMessage,
11
14
  LanguageModelMessages,
@@ -13,8 +16,6 @@ from unique_toolkit.language_model import (
13
16
  )
14
17
 
15
18
 
16
-
17
-
18
19
  class SpecialToolCallingTokens(BaseModel):
19
20
  func_init: int = 0
20
21
  prop_init: int = 0
@@ -123,18 +124,14 @@ def num_tokens_for_tools(
123
124
  )
124
125
  )
125
126
  if len(function.get("parameters", {}).get("properties", "")) > 0:
126
- properties = function.get("parameters", {}).get(
127
- "properties", ""
128
- )
127
+ properties = function.get("parameters", {}).get("properties", "")
129
128
  func_token_count += special_token.prop_init
130
129
 
131
130
  for key in list(properties.keys()):
132
131
  func_token_count += special_token.prop_key
133
132
 
134
133
  if "enum" in properties[key].keys():
135
- func_token_count += num_token_function_enum(
136
- properties, encode
137
- )
134
+ func_token_count += num_token_function_enum(properties, encode)
138
135
 
139
136
  func_token_count += len(
140
137
  encode(
@@ -147,9 +144,7 @@ def num_tokens_for_tools(
147
144
  return func_token_count
148
145
 
149
146
 
150
- def handle_message_with_images(
151
- message: list[dict], encode: Callable[[str], list[int]]
152
- ):
147
+ def handle_message_with_images(message: list[dict], encode: Callable[[str], list[int]]):
153
148
  token_count = 0
154
149
  for item in message:
155
150
  if item.get("type") == "image_url":
@@ -168,11 +163,7 @@ def messages_to_openai_messages(
168
163
  messages = LanguageModelMessages(messages)
169
164
 
170
165
  return [
171
- {
172
- k: v
173
- for k, v in m.items()
174
- if (k in ["content", "role"] and v is not None)
175
- }
166
+ {k: v for k, v in m.items() if (k in ["content", "role"] and v is not None)}
176
167
  for m in json.loads(messages.model_dump_json())
177
168
  ]
178
169
 
@@ -190,6 +181,4 @@ def num_token_for_language_model_messages(
190
181
  messages: LanguageModelMessages | list[LanguageModelMessage],
191
182
  encode: Callable[[str], list[int]],
192
183
  ) -> int:
193
- return num_tokens_from_messages(
194
- messages_to_openai_messages(messages), encode
195
- )
184
+ return num_tokens_from_messages(messages_to_openai_messages(messages), encode)
@@ -2,6 +2,7 @@ import logging
2
2
  from typing import Annotated, Any
3
3
 
4
4
  from pydantic import BeforeValidator, Field, PlainSerializer, ValidationInfo
5
+ from pydantic.fields import FieldInfo
5
6
 
6
7
  from unique_toolkit.language_model import LanguageModelName
7
8
  from unique_toolkit.language_model.infos import (
@@ -9,9 +10,6 @@ from unique_toolkit.language_model.infos import (
9
10
  LanguageModelProvider,
10
11
  )
11
12
 
12
- from pydantic.fields import FieldInfo
13
-
14
-
15
13
  logger = logging.getLogger(__name__)
16
14
 
17
15
  # TODO @klcd: Inform on deprecation of str as input
@@ -28,6 +26,7 @@ LMI = Annotated[
28
26
  ),
29
27
  ]
30
28
 
29
+
31
30
  def get_LMI_default_field(llm_name: LanguageModelName, **kwargs) -> Any:
32
31
  return Field(
33
32
  default=LanguageModelInfo.from_name(llm_name),
@@ -1,4 +1,3 @@
1
- from unique_toolkit.content.schemas import ContentChunk, ContentReference
2
1
  from unique_toolkit.tools.schemas import ToolCallResponse
3
2
 
4
3
 
@@ -6,7 +6,6 @@ from pydantic import BaseModel, ConfigDict, Field
6
6
  from unique_toolkit._common.validators import LMI
7
7
  from unique_toolkit.language_model.infos import LanguageModelInfo, LanguageModelName
8
8
 
9
-
10
9
  from .schemas import (
11
10
  EvaluationMetricName,
12
11
  )
@@ -1,10 +1,6 @@
1
- from pydantic import BaseModel, Field, create_model
1
+ from pydantic import BaseModel, ConfigDict, Field, create_model
2
2
  from pydantic.json_schema import SkipJsonSchema
3
3
 
4
-
5
-
6
- from pydantic import BaseModel, ConfigDict
7
-
8
4
  from unique_toolkit.tools.config import get_configuration_dict
9
5
 
10
6
 
@@ -12,7 +8,6 @@ class StructuredOutputModel(BaseModel):
12
8
  model_config = ConfigDict(extra="forbid")
13
9
 
14
10
 
15
-
16
11
  class StructuredOutputConfig(BaseModel):
17
12
  model_config = get_configuration_dict()
18
13
 
@@ -1,33 +1,31 @@
1
- from abc import ABC
2
1
  import asyncio
3
-
4
-
5
- from unique_toolkit.tools.utils.execution.execution import Result, SafeTaskExecutor
2
+ from abc import ABC
6
3
  from logging import Logger
7
- from unique_toolkit.evals.schemas import (
8
- EvaluationAssessmentMessage,
9
- EvaluationMetricName,
10
- EvaluationMetricResult,
11
- )
4
+
12
5
  from unique_toolkit.chat.schemas import (
13
6
  ChatMessageAssessmentStatus,
14
7
  ChatMessageAssessmentType,
15
8
  )
16
9
  from unique_toolkit.chat.service import ChatService
10
+ from unique_toolkit.evals.schemas import (
11
+ EvaluationAssessmentMessage,
12
+ EvaluationMetricName,
13
+ EvaluationMetricResult,
14
+ )
17
15
  from unique_toolkit.language_model.schemas import (
18
16
  LanguageModelStreamResponse,
19
17
  )
18
+ from unique_toolkit.tools.utils.execution.execution import Result, SafeTaskExecutor
20
19
 
21
20
 
22
21
  class Evaluation(ABC):
23
22
  """
24
23
  Abstract base class for evaluation metrics.
25
-
24
+
26
25
  typical use cases include:
27
26
  - Hallucination checking
28
27
  - compliance checking
29
28
  """
30
-
31
29
 
32
30
  def __init__(self, name: EvaluationMetricName):
33
31
  self.name = name
@@ -73,8 +71,8 @@ class EvaluationManager:
73
71
 
74
72
  The EvaluationManager serves as the central hub for managing and executing evaluations.
75
73
  """
76
- # a hashmap to hold evaluations by their names
77
74
 
75
+ # a hashmap to hold evaluations by their names
78
76
 
79
77
  def __init__(
80
78
  self,
@@ -16,7 +16,6 @@ from unique_toolkit.evals.schemas import (
16
16
  )
17
17
  from unique_toolkit.language_model.infos import LanguageModelInfo, LanguageModelName
18
18
 
19
-
20
19
  SYSTEM_MSG_KEY = "systemPrompt"
21
20
  USER_MSG_KEY = "userPrompt"
22
21
  SYSTEM_MSG_DEFAULT_KEY = "systemPromptDefault"
@@ -1,5 +1,3 @@
1
- from typing import Any
2
-
3
1
  from unique_toolkit.app.schemas import ChatEvent
4
2
  from unique_toolkit.chat.schemas import (
5
3
  ChatMessageAssessmentLabel,
@@ -7,6 +5,9 @@ from unique_toolkit.chat.schemas import (
7
5
  ChatMessageAssessmentType,
8
6
  )
9
7
  from unique_toolkit.evals.evaluation_manager import Evaluation
8
+ from unique_toolkit.evals.hallucination.constants import (
9
+ HallucinationConfig,
10
+ )
10
11
  from unique_toolkit.evals.hallucination.utils import check_hallucination
11
12
  from unique_toolkit.evals.schemas import (
12
13
  EvaluationAssessmentMessage,
@@ -14,17 +15,13 @@ from unique_toolkit.evals.schemas import (
14
15
  EvaluationMetricName,
15
16
  EvaluationMetricResult,
16
17
  )
17
- from unique_toolkit.evals.hallucination.constants import (
18
- HallucinationConfig,
18
+ from unique_toolkit.language_model.schemas import (
19
+ LanguageModelStreamResponse,
19
20
  )
20
21
  from unique_toolkit.reference_manager.reference_manager import (
21
22
  ReferenceManager,
22
23
  )
23
24
 
24
- from unique_toolkit.language_model.schemas import (
25
- LanguageModelStreamResponse,
26
- )
27
-
28
25
 
29
26
  class HallucinationEvaluation(Evaluation):
30
27
  def __init__(
@@ -4,7 +4,6 @@ from unique_toolkit.app.schemas import ChatEvent
4
4
  from unique_toolkit.evals.config import EvaluationMetricConfig
5
5
  from unique_toolkit.evals.schemas import EvaluationMetricInput, EvaluationMetricResult
6
6
 
7
-
8
7
  from .constants import hallucination_metric_default_config
9
8
  from .utils import check_hallucination
10
9
 
@@ -2,13 +2,6 @@ import logging
2
2
  from string import Template
3
3
 
4
4
  from unique_toolkit.content.schemas import ContentChunk
5
- from unique_toolkit.language_model.schemas import (
6
- LanguageModelMessages,
7
- LanguageModelStreamResponse,
8
- LanguageModelSystemMessage,
9
- LanguageModelUserMessage,
10
- )
11
- from unique_toolkit.language_model.service import LanguageModelService
12
5
  from unique_toolkit.evals.config import EvaluationMetricConfig
13
6
  from unique_toolkit.evals.exception import EvaluatorException
14
7
  from unique_toolkit.evals.output_parser import parse_eval_metric_result
@@ -17,7 +10,13 @@ from unique_toolkit.evals.schemas import (
17
10
  EvaluationMetricName,
18
11
  EvaluationMetricResult,
19
12
  )
20
-
13
+ from unique_toolkit.language_model.schemas import (
14
+ LanguageModelMessages,
15
+ LanguageModelStreamResponse,
16
+ LanguageModelSystemMessage,
17
+ LanguageModelUserMessage,
18
+ )
19
+ from unique_toolkit.language_model.service import LanguageModelService
21
20
 
22
21
  from .constants import (
23
22
  SYSTEM_MSG_DEFAULT_KEY,
@@ -2,13 +2,14 @@ from enum import StrEnum
2
2
  from typing import Optional
3
3
 
4
4
  from pydantic import BaseModel, ConfigDict, Field
5
+
5
6
  from unique_toolkit.chat import ChatMessage
6
- from unique_toolkit.evals.exception import EvaluatorException
7
7
  from unique_toolkit.chat.schemas import (
8
8
  ChatMessageAssessmentLabel,
9
9
  ChatMessageAssessmentStatus,
10
10
  ChatMessageAssessmentType,
11
11
  )
12
+ from unique_toolkit.evals.exception import EvaluatorException
12
13
 
13
14
 
14
15
  class EvaluationMetricName(StrEnum):
@@ -1,17 +1,9 @@
1
1
  from unittest.mock import MagicMock, patch
2
2
 
3
3
  import pytest
4
+
4
5
  from unique_toolkit.app.schemas import ChatEvent
5
6
  from unique_toolkit.chat.service import LanguageModelName
6
- from unique_toolkit.language_model.infos import (
7
- LanguageModelInfo,
8
- )
9
- from unique_toolkit.language_model.schemas import (
10
- LanguageModelAssistantMessage,
11
- LanguageModelCompletionChoice,
12
- LanguageModelMessages,
13
- )
14
- from unique_toolkit.language_model.service import LanguageModelResponse
15
7
  from unique_toolkit.evals.config import EvaluationMetricConfig
16
8
  from unique_toolkit.evals.context_relevancy.prompts import (
17
9
  CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG,
@@ -28,6 +20,15 @@ from unique_toolkit.evals.schemas import (
28
20
  EvaluationMetricName,
29
21
  EvaluationMetricResult,
30
22
  )
23
+ from unique_toolkit.language_model.infos import (
24
+ LanguageModelInfo,
25
+ )
26
+ from unique_toolkit.language_model.schemas import (
27
+ LanguageModelAssistantMessage,
28
+ LanguageModelCompletionChoice,
29
+ LanguageModelMessages,
30
+ )
31
+ from unique_toolkit.language_model.service import LanguageModelResponse
31
32
 
32
33
 
33
34
  @pytest.fixture
@@ -1,13 +1,17 @@
1
1
  import pytest
2
2
 
3
- from unique_toolkit.evals.context_relevancy.schema import EvaluationSchemaStructuredOutput, Fact
3
+ from unique_toolkit.evals.context_relevancy.schema import (
4
+ EvaluationSchemaStructuredOutput,
5
+ Fact,
6
+ )
4
7
  from unique_toolkit.evals.exception import EvaluatorException
5
- from unique_toolkit.evals.output_parser import parse_eval_metric_result, parse_eval_metric_result_structured_output
8
+ from unique_toolkit.evals.output_parser import (
9
+ parse_eval_metric_result,
10
+ parse_eval_metric_result_structured_output,
11
+ )
6
12
  from unique_toolkit.evals.schemas import EvaluationMetricName, EvaluationMetricResult
7
13
 
8
14
 
9
-
10
-
11
15
  def test_parse_eval_metric_result_success():
12
16
  # Test successful parsing with all fields
13
17
  result = '{"value": "high", "reason": "Test reason"}'
@@ -1,26 +1,24 @@
1
1
  import base64
2
2
  import mimetypes
3
-
4
3
  from datetime import datetime
5
4
  from enum import StrEnum
6
5
 
7
6
  import numpy as np
8
7
  import tiktoken
9
-
10
8
  from pydantic import RootModel
11
9
 
12
- from unique_toolkit._common.token.token_counting import num_tokens_per_language_model_message
13
- from unique_toolkit.chat.service import ChatService
14
- from unique_toolkit.content.service import ContentService
15
- from unique_toolkit.language_model.schemas import LanguageModelMessages
10
+ from unique_toolkit._common.token.token_counting import (
11
+ num_tokens_per_language_model_message,
12
+ )
16
13
  from unique_toolkit.app import ChatEventUserMessage
17
14
  from unique_toolkit.chat.schemas import ChatMessage
18
15
  from unique_toolkit.chat.schemas import ChatMessageRole as ChatRole
16
+ from unique_toolkit.chat.service import ChatService
19
17
  from unique_toolkit.content.schemas import Content
18
+ from unique_toolkit.content.service import ContentService
20
19
  from unique_toolkit.language_model import LanguageModelMessageRole as LLMRole
21
20
  from unique_toolkit.language_model.infos import EncoderName
22
-
23
-
21
+ from unique_toolkit.language_model.schemas import LanguageModelMessages
24
22
 
25
23
  # TODO: Test this once it moves into the unique toolkit
26
24
 
@@ -188,8 +186,7 @@ def file_content_serialization(
188
186
  return ""
189
187
  case FileContentSerialization.FILE_NAME:
190
188
  file_names = [
191
- f"- Uploaded file: {f.key} at {f.created_at}"
192
- for f in file_contents
189
+ f"- Uploaded file: {f.key} at {f.created_at}" for f in file_contents
193
190
  ]
194
191
  return "\n".join(
195
192
  [
@@ -226,12 +223,8 @@ def get_full_history_with_contents(
226
223
  text = ""
227
224
 
228
225
  if len(c.contents) > 0:
229
- file_contents = [
230
- co for co in c.contents if is_file_content(co.key)
231
- ]
232
- image_contents = [
233
- co for co in c.contents if is_image_content(co.key)
234
- ]
226
+ file_contents = [co for co in c.contents if is_file_content(co.key)]
227
+ image_contents = [co for co in c.contents if is_image_content(co.key)]
235
228
 
236
229
  content = (
237
230
  text
@@ -285,7 +278,6 @@ def get_full_history_as_llm_messages(
285
278
  return builder.build()
286
279
 
287
280
 
288
-
289
281
  def limit_to_token_window(
290
282
  messages: LanguageModelMessages,
291
283
  token_limit: int,
@@ -297,9 +289,7 @@ def limit_to_token_window(
297
289
  encode=encoder.encode,
298
290
  )
299
291
 
300
- to_take: list[bool] = (
301
- np.cumsum(token_per_message_reversed) < token_limit
302
- ).tolist()
292
+ to_take: list[bool] = (np.cumsum(token_per_message_reversed) < token_limit).tolist()
303
293
  to_take.reverse()
304
294
 
305
295
  return LanguageModelMessages(
@@ -3,32 +3,28 @@ from typing import Annotated, Awaitable, Callable
3
3
 
4
4
  from pydantic import BaseModel, Field
5
5
 
6
+ from unique_toolkit._common.validators import LMI
6
7
  from unique_toolkit.app.schemas import ChatEvent
7
-
8
-
9
-
8
+ from unique_toolkit.history_manager.loop_token_reducer import LoopTokenReducer
9
+ from unique_toolkit.history_manager.utils import transform_chunks_to_string
10
+ from unique_toolkit.language_model.infos import LanguageModelInfo, LanguageModelName
10
11
  from unique_toolkit.language_model.schemas import (
11
12
  LanguageModelAssistantMessage,
12
13
  LanguageModelFunction,
13
- LanguageModelMessage,
14
+ LanguageModelMessage,
14
15
  LanguageModelMessages,
15
- LanguageModelToolMessage
16
+ LanguageModelToolMessage,
16
17
  )
17
-
18
- from unique_toolkit.tools.schemas import ToolCallResponse
19
- from unique_toolkit.history_manager.utils import transform_chunks_to_string
20
-
21
- from unique_toolkit._common.validators import LMI
22
- from unique_toolkit.history_manager.loop_token_reducer import LoopTokenReducer
23
- from unique_toolkit.language_model.infos import LanguageModelInfo, LanguageModelName
24
18
  from unique_toolkit.reference_manager.reference_manager import ReferenceManager
25
19
  from unique_toolkit.tools.config import get_configuration_dict
20
+ from unique_toolkit.tools.schemas import ToolCallResponse
26
21
 
27
22
  DeactivatedNone = Annotated[
28
23
  None,
29
24
  Field(title="Deactivated", description="None"),
30
25
  ]
31
26
 
27
+
32
28
  class UploadedContentConfig(BaseModel):
33
29
  model_config = get_configuration_dict()
34
30
 
@@ -44,8 +40,8 @@ class UploadedContentConfig(BaseModel):
44
40
  description="The fraction of the max input tokens that will be reserved for the uploaded content.",
45
41
  )
46
42
 
47
- class ExperimentalFeatures(BaseModel):
48
43
 
44
+ class ExperimentalFeatures(BaseModel):
49
45
  full_sources_serialize_dump: bool = Field(
50
46
  default=False,
51
47
  description="If True, the sources will be serialized in full, otherwise only the content will be serialized.",
@@ -53,14 +49,11 @@ class ExperimentalFeatures(BaseModel):
53
49
 
54
50
 
55
51
  class HistoryManagerConfig(BaseModel):
56
-
57
-
58
52
  experimental_features: ExperimentalFeatures = Field(
59
53
  default=ExperimentalFeatures(),
60
54
  description="Experimental features for the history manager.",
61
55
  )
62
56
 
63
-
64
57
  percent_of_max_tokens_for_history: float = Field(
65
58
  default=0.2,
66
59
  ge=0.0,
@@ -88,7 +81,6 @@ class HistoryManagerConfig(BaseModel):
88
81
  ) = UploadedContentConfig()
89
82
 
90
83
 
91
-
92
84
  class HistoryManager:
93
85
  """
94
86
  Manages the history of tool calls and conversation loops.
@@ -110,7 +102,6 @@ class HistoryManager:
110
102
  The HistoryManager serves as the backbone for managing and retrieving conversation history in a structured and efficient manner.
111
103
  """
112
104
 
113
-
114
105
  def __init__(
115
106
  self,
116
107
  logger: Logger,
@@ -134,7 +125,6 @@ class HistoryManager:
134
125
  self._loop_history: list[LanguageModelMessage] = []
135
126
  self._source_enumerator = 0
136
127
 
137
-
138
128
  def has_no_loop_messages(self) -> bool:
139
129
  return len(self._loop_history) == 0
140
130
 
@@ -201,13 +191,12 @@ class HistoryManager:
201
191
  def add_assistant_message(self, message: LanguageModelAssistantMessage) -> None:
202
192
  self._loop_history.append(message)
203
193
 
204
-
205
194
  async def get_history_for_model_call(
206
195
  self,
207
196
  original_user_message: str,
208
197
  rendered_user_message_string: str,
209
198
  rendered_system_message_string: str,
210
- remove_from_text: Callable[[str], Awaitable[str]]
199
+ remove_from_text: Callable[[str], Awaitable[str]],
211
200
  ) -> LanguageModelMessages:
212
201
  self._logger.info("Getting history for model call -> ")
213
202
 
@@ -218,4 +207,4 @@ class HistoryManager:
218
207
  loop_history=self._loop_history,
219
208
  remove_from_text=remove_from_text,
220
209
  )
221
- return messages
210
+ return messages